#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.offline as py
import plotly.offline as offline
import plotly.graph_objects as go
import cufflinks as cf
cf.go_offline()
from plotly.offline import init_notebook_mode, iplot
import warnings
warnings.filterwarnings('ignore')
pd.reset_option('display.max_columns',None)
#loading the data
app_train=pd.read_csv('application_train.csv') #current applications
pos_bal = pd.read_csv('POS_CASH_balance.csv') #previous applications to check if anyone has applied before
bure_bal = pd.read_csv('bureau_balance.csv')
prev_app = pd.read_csv('previous_application.csv')
inst_pay = pd.read_csv('installments_payments.csv')
cred_bal = pd.read_csv('credit_card_balance.csv')
bure = pd.read_csv('bureau.csv')
# Checking the dimension of our dataset
print(app_train.shape)
print(pos_bal.shape)
print(bure_bal.shape)
print(prev_app.shape)
print(inst_pay.shape)
print(cred_bal.shape)
print(bure.shape)
(307511, 122) (10001358, 8) (27299925, 3) (1670214, 37) (13605401, 8) (3840312, 23) (1716428, 17)
app_train.head()#Getting the first five rows
| SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | ... | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100002 | 1 | Cash loans | M | N | Y | 0 | 202500.0 | 406597.5 | 24700.5 | ... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 1 | 100003 | 0 | Cash loans | F | N | N | 0 | 270000.0 | 1293502.5 | 35698.5 | ... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 100004 | 0 | Revolving loans | M | Y | Y | 0 | 67500.0 | 135000.0 | 6750.0 | ... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 100006 | 0 | Cash loans | F | N | Y | 0 | 135000.0 | 312682.5 | 29686.5 | ... | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 100007 | 0 | Cash loans | M | N | Y | 0 | 121500.0 | 513000.0 | 21865.5 | ... | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
5 rows × 122 columns
app_train.columns.values
array(['SK_ID_CURR', 'TARGET', 'NAME_CONTRACT_TYPE', 'CODE_GENDER',
'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'CNT_CHILDREN',
'AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_ANNUITY', 'AMT_GOODS_PRICE',
'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE',
'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE',
'REGION_POPULATION_RELATIVE', 'DAYS_BIRTH', 'DAYS_EMPLOYED',
'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH', 'OWN_CAR_AGE',
'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE',
'FLAG_CONT_MOBILE', 'FLAG_PHONE', 'FLAG_EMAIL', 'OCCUPATION_TYPE',
'CNT_FAM_MEMBERS', 'REGION_RATING_CLIENT',
'REGION_RATING_CLIENT_W_CITY', 'WEEKDAY_APPR_PROCESS_START',
'HOUR_APPR_PROCESS_START', 'REG_REGION_NOT_LIVE_REGION',
'REG_REGION_NOT_WORK_REGION', 'LIVE_REGION_NOT_WORK_REGION',
'REG_CITY_NOT_LIVE_CITY', 'REG_CITY_NOT_WORK_CITY',
'LIVE_CITY_NOT_WORK_CITY', 'ORGANIZATION_TYPE', 'EXT_SOURCE_1',
'EXT_SOURCE_2', 'EXT_SOURCE_3', 'APARTMENTS_AVG',
'BASEMENTAREA_AVG', 'YEARS_BEGINEXPLUATATION_AVG',
'YEARS_BUILD_AVG', 'COMMONAREA_AVG', 'ELEVATORS_AVG',
'ENTRANCES_AVG', 'FLOORSMAX_AVG', 'FLOORSMIN_AVG', 'LANDAREA_AVG',
'LIVINGAPARTMENTS_AVG', 'LIVINGAREA_AVG',
'NONLIVINGAPARTMENTS_AVG', 'NONLIVINGAREA_AVG', 'APARTMENTS_MODE',
'BASEMENTAREA_MODE', 'YEARS_BEGINEXPLUATATION_MODE',
'YEARS_BUILD_MODE', 'COMMONAREA_MODE', 'ELEVATORS_MODE',
'ENTRANCES_MODE', 'FLOORSMAX_MODE', 'FLOORSMIN_MODE',
'LANDAREA_MODE', 'LIVINGAPARTMENTS_MODE', 'LIVINGAREA_MODE',
'NONLIVINGAPARTMENTS_MODE', 'NONLIVINGAREA_MODE',
'APARTMENTS_MEDI', 'BASEMENTAREA_MEDI',
'YEARS_BEGINEXPLUATATION_MEDI', 'YEARS_BUILD_MEDI',
'COMMONAREA_MEDI', 'ELEVATORS_MEDI', 'ENTRANCES_MEDI',
'FLOORSMAX_MEDI', 'FLOORSMIN_MEDI', 'LANDAREA_MEDI',
'LIVINGAPARTMENTS_MEDI', 'LIVINGAREA_MEDI',
'NONLIVINGAPARTMENTS_MEDI', 'NONLIVINGAREA_MEDI',
'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'TOTALAREA_MODE',
'WALLSMATERIAL_MODE', 'EMERGENCYSTATE_MODE',
'OBS_30_CNT_SOCIAL_CIRCLE', 'DEF_30_CNT_SOCIAL_CIRCLE',
'OBS_60_CNT_SOCIAL_CIRCLE', 'DEF_60_CNT_SOCIAL_CIRCLE',
'DAYS_LAST_PHONE_CHANGE', 'FLAG_DOCUMENT_2', 'FLAG_DOCUMENT_3',
'FLAG_DOCUMENT_4', 'FLAG_DOCUMENT_5', 'FLAG_DOCUMENT_6',
'FLAG_DOCUMENT_7', 'FLAG_DOCUMENT_8', 'FLAG_DOCUMENT_9',
'FLAG_DOCUMENT_10', 'FLAG_DOCUMENT_11', 'FLAG_DOCUMENT_12',
'FLAG_DOCUMENT_13', 'FLAG_DOCUMENT_14', 'FLAG_DOCUMENT_15',
'FLAG_DOCUMENT_16', 'FLAG_DOCUMENT_17', 'FLAG_DOCUMENT_18',
'FLAG_DOCUMENT_19', 'FLAG_DOCUMENT_20', 'FLAG_DOCUMENT_21',
'AMT_REQ_CREDIT_BUREAU_HOUR', 'AMT_REQ_CREDIT_BUREAU_DAY',
'AMT_REQ_CREDIT_BUREAU_WEEK', 'AMT_REQ_CREDIT_BUREAU_MON',
'AMT_REQ_CREDIT_BUREAU_QRT', 'AMT_REQ_CREDIT_BUREAU_YEAR'],
dtype=object)
app_train.describe()#Used to view some basic statistical details like percentile,mean,std etc
| SK_ID_CURR | TARGET | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | ... | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 307511.000000 | 307511.000000 | 307511.000000 | 3.075110e+05 | 3.075110e+05 | 307499.000000 | 3.072330e+05 | 307511.000000 | 307511.000000 | 307511.000000 | ... | 307511.000000 | 307511.000000 | 307511.000000 | 307511.000000 | 265992.000000 | 265992.000000 | 265992.000000 | 265992.000000 | 265992.000000 | 265992.000000 |
| mean | 278180.518577 | 0.080729 | 0.417052 | 1.687979e+05 | 5.990260e+05 | 27108.573909 | 5.383962e+05 | 0.020868 | -16036.995067 | 63815.045904 | ... | 0.008130 | 0.000595 | 0.000507 | 0.000335 | 0.006402 | 0.007000 | 0.034362 | 0.267395 | 0.265474 | 1.899974 |
| std | 102790.175348 | 0.272419 | 0.722121 | 2.371231e+05 | 4.024908e+05 | 14493.737315 | 3.694465e+05 | 0.013831 | 4363.988632 | 141275.766519 | ... | 0.089798 | 0.024387 | 0.022518 | 0.018299 | 0.083849 | 0.110757 | 0.204685 | 0.916002 | 0.794056 | 1.869295 |
| min | 100002.000000 | 0.000000 | 0.000000 | 2.565000e+04 | 4.500000e+04 | 1615.500000 | 4.050000e+04 | 0.000290 | -25229.000000 | -17912.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 189145.500000 | 0.000000 | 0.000000 | 1.125000e+05 | 2.700000e+05 | 16524.000000 | 2.385000e+05 | 0.010006 | -19682.000000 | -2760.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 50% | 278202.000000 | 0.000000 | 0.000000 | 1.471500e+05 | 5.135310e+05 | 24903.000000 | 4.500000e+05 | 0.018850 | -15750.000000 | -1213.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| 75% | 367142.500000 | 0.000000 | 1.000000 | 2.025000e+05 | 8.086500e+05 | 34596.000000 | 6.795000e+05 | 0.028663 | -12413.000000 | -289.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 3.000000 |
| max | 456255.000000 | 1.000000 | 19.000000 | 1.170000e+08 | 4.050000e+06 | 258025.500000 | 4.050000e+06 | 0.072508 | -7489.000000 | 365243.000000 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 4.000000 | 9.000000 | 8.000000 | 27.000000 | 261.000000 | 25.000000 |
8 rows × 106 columns
app_train.info()#To check data type and null values for all columns
<class 'pandas.core.frame.DataFrame'> RangeIndex: 307511 entries, 0 to 307510 Columns: 122 entries, SK_ID_CURR to AMT_REQ_CREDIT_BUREAU_YEAR dtypes: float64(65), int64(41), object(16) memory usage: 286.2+ MB
pos_bal.head()#Getting the first five rows
| SK_ID_PREV | SK_ID_CURR | MONTHS_BALANCE | CNT_INSTALMENT | CNT_INSTALMENT_FUTURE | NAME_CONTRACT_STATUS | SK_DPD | SK_DPD_DEF | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1803195 | 182943 | -31 | 48.0 | 45.0 | Active | 0 | 0 |
| 1 | 1715348 | 367990 | -33 | 36.0 | 35.0 | Active | 0 | 0 |
| 2 | 1784872 | 397406 | -32 | 12.0 | 9.0 | Active | 0 | 0 |
| 3 | 1903291 | 269225 | -35 | 48.0 | 42.0 | Active | 0 | 0 |
| 4 | 2341044 | 334279 | -35 | 36.0 | 35.0 | Active | 0 | 0 |
pos_bal.columns.values
array(['SK_ID_PREV', 'SK_ID_CURR', 'MONTHS_BALANCE', 'CNT_INSTALMENT',
'CNT_INSTALMENT_FUTURE', 'NAME_CONTRACT_STATUS', 'SK_DPD',
'SK_DPD_DEF'], dtype=object)
pos_bal.describe()#Used to view some basic statistical details like percentile,mean,std etc.
| SK_ID_PREV | SK_ID_CURR | MONTHS_BALANCE | CNT_INSTALMENT | CNT_INSTALMENT_FUTURE | SK_DPD | SK_DPD_DEF | |
|---|---|---|---|---|---|---|---|
| count | 1.000136e+07 | 1.000136e+07 | 1.000136e+07 | 9.975287e+06 | 9.975271e+06 | 1.000136e+07 | 1.000136e+07 |
| mean | 1.903217e+06 | 2.784039e+05 | -3.501259e+01 | 1.708965e+01 | 1.048384e+01 | 1.160693e+01 | 6.544684e-01 |
| std | 5.358465e+05 | 1.027637e+05 | 2.606657e+01 | 1.199506e+01 | 1.110906e+01 | 1.327140e+02 | 3.276249e+01 |
| min | 1.000001e+06 | 1.000010e+05 | -9.600000e+01 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
| 25% | 1.434405e+06 | 1.895500e+05 | -5.400000e+01 | 1.000000e+01 | 3.000000e+00 | 0.000000e+00 | 0.000000e+00 |
| 50% | 1.896565e+06 | 2.786540e+05 | -2.800000e+01 | 1.200000e+01 | 7.000000e+00 | 0.000000e+00 | 0.000000e+00 |
| 75% | 2.368963e+06 | 3.674290e+05 | -1.300000e+01 | 2.400000e+01 | 1.400000e+01 | 0.000000e+00 | 0.000000e+00 |
| max | 2.843499e+06 | 4.562550e+05 | -1.000000e+00 | 9.200000e+01 | 8.500000e+01 | 4.231000e+03 | 3.595000e+03 |
pos_bal.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10001358 entries, 0 to 10001357 Data columns (total 8 columns): # Column Dtype --- ------ ----- 0 SK_ID_PREV int64 1 SK_ID_CURR int64 2 MONTHS_BALANCE int64 3 CNT_INSTALMENT float64 4 CNT_INSTALMENT_FUTURE float64 5 NAME_CONTRACT_STATUS object 6 SK_DPD int64 7 SK_DPD_DEF int64 dtypes: float64(2), int64(5), object(1) memory usage: 610.4+ MB
bure_bal.head()#Getting the first five rows
| SK_ID_BUREAU | MONTHS_BALANCE | STATUS | |
|---|---|---|---|
| 0 | 5715448 | 0 | C |
| 1 | 5715448 | -1 | C |
| 2 | 5715448 | -2 | C |
| 3 | 5715448 | -3 | C |
| 4 | 5715448 | -4 | C |
bure_bal.columns.values
array(['SK_ID_BUREAU', 'MONTHS_BALANCE', 'STATUS'], dtype=object)
bure_bal.describe()#Used to view some basic statistical details like percentile,mean,std etc.
| SK_ID_BUREAU | MONTHS_BALANCE | |
|---|---|---|
| count | 2.729992e+07 | 2.729992e+07 |
| mean | 6.036297e+06 | -3.074169e+01 |
| std | 4.923489e+05 | 2.386451e+01 |
| min | 5.001709e+06 | -9.600000e+01 |
| 25% | 5.730933e+06 | -4.600000e+01 |
| 50% | 6.070821e+06 | -2.500000e+01 |
| 75% | 6.431951e+06 | -1.100000e+01 |
| max | 6.842888e+06 | 0.000000e+00 |
bure_bal.info()#To check data type and null values for all columns
<class 'pandas.core.frame.DataFrame'> RangeIndex: 27299925 entries, 0 to 27299924 Data columns (total 3 columns): # Column Dtype --- ------ ----- 0 SK_ID_BUREAU int64 1 MONTHS_BALANCE int64 2 STATUS object dtypes: int64(2), object(1) memory usage: 624.8+ MB
prev_app.head()#Getting the first five rows
| SK_ID_PREV | SK_ID_CURR | NAME_CONTRACT_TYPE | AMT_ANNUITY | AMT_APPLICATION | AMT_CREDIT | AMT_DOWN_PAYMENT | AMT_GOODS_PRICE | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | ... | NAME_SELLER_INDUSTRY | CNT_PAYMENT | NAME_YIELD_GROUP | PRODUCT_COMBINATION | DAYS_FIRST_DRAWING | DAYS_FIRST_DUE | DAYS_LAST_DUE_1ST_VERSION | DAYS_LAST_DUE | DAYS_TERMINATION | NFLAG_INSURED_ON_APPROVAL | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2030495 | 271877 | Consumer loans | 1730.430 | 17145.0 | 17145.0 | 0.0 | 17145.0 | SATURDAY | 15 | ... | Connectivity | 12.0 | middle | POS mobile with interest | 365243.0 | -42.0 | 300.0 | -42.0 | -37.0 | 0.0 |
| 1 | 2802425 | 108129 | Cash loans | 25188.615 | 607500.0 | 679671.0 | NaN | 607500.0 | THURSDAY | 11 | ... | XNA | 36.0 | low_action | Cash X-Sell: low | 365243.0 | -134.0 | 916.0 | 365243.0 | 365243.0 | 1.0 |
| 2 | 2523466 | 122040 | Cash loans | 15060.735 | 112500.0 | 136444.5 | NaN | 112500.0 | TUESDAY | 11 | ... | XNA | 12.0 | high | Cash X-Sell: high | 365243.0 | -271.0 | 59.0 | 365243.0 | 365243.0 | 1.0 |
| 3 | 2819243 | 176158 | Cash loans | 47041.335 | 450000.0 | 470790.0 | NaN | 450000.0 | MONDAY | 7 | ... | XNA | 12.0 | middle | Cash X-Sell: middle | 365243.0 | -482.0 | -152.0 | -182.0 | -177.0 | 1.0 |
| 4 | 1784265 | 202054 | Cash loans | 31924.395 | 337500.0 | 404055.0 | NaN | 337500.0 | THURSDAY | 9 | ... | XNA | 24.0 | high | Cash Street: high | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 37 columns
prev_app.columns.values
array(['SK_ID_PREV', 'SK_ID_CURR', 'NAME_CONTRACT_TYPE', 'AMT_ANNUITY',
'AMT_APPLICATION', 'AMT_CREDIT', 'AMT_DOWN_PAYMENT',
'AMT_GOODS_PRICE', 'WEEKDAY_APPR_PROCESS_START',
'HOUR_APPR_PROCESS_START', 'FLAG_LAST_APPL_PER_CONTRACT',
'NFLAG_LAST_APPL_IN_DAY', 'RATE_DOWN_PAYMENT',
'RATE_INTEREST_PRIMARY', 'RATE_INTEREST_PRIVILEGED',
'NAME_CASH_LOAN_PURPOSE', 'NAME_CONTRACT_STATUS', 'DAYS_DECISION',
'NAME_PAYMENT_TYPE', 'CODE_REJECT_REASON', 'NAME_TYPE_SUITE',
'NAME_CLIENT_TYPE', 'NAME_GOODS_CATEGORY', 'NAME_PORTFOLIO',
'NAME_PRODUCT_TYPE', 'CHANNEL_TYPE', 'SELLERPLACE_AREA',
'NAME_SELLER_INDUSTRY', 'CNT_PAYMENT', 'NAME_YIELD_GROUP',
'PRODUCT_COMBINATION', 'DAYS_FIRST_DRAWING', 'DAYS_FIRST_DUE',
'DAYS_LAST_DUE_1ST_VERSION', 'DAYS_LAST_DUE', 'DAYS_TERMINATION',
'NFLAG_INSURED_ON_APPROVAL'], dtype=object)
prev_app.describe()#Used to view some basic statistical details like percentile,mean,std etc.
| SK_ID_PREV | SK_ID_CURR | AMT_ANNUITY | AMT_APPLICATION | AMT_CREDIT | AMT_DOWN_PAYMENT | AMT_GOODS_PRICE | HOUR_APPR_PROCESS_START | NFLAG_LAST_APPL_IN_DAY | RATE_DOWN_PAYMENT | ... | RATE_INTEREST_PRIVILEGED | DAYS_DECISION | SELLERPLACE_AREA | CNT_PAYMENT | DAYS_FIRST_DRAWING | DAYS_FIRST_DUE | DAYS_LAST_DUE_1ST_VERSION | DAYS_LAST_DUE | DAYS_TERMINATION | NFLAG_INSURED_ON_APPROVAL | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1.670214e+06 | 1.670214e+06 | 1.297979e+06 | 1.670214e+06 | 1.670213e+06 | 7.743700e+05 | 1.284699e+06 | 1.670214e+06 | 1.670214e+06 | 774370.000000 | ... | 5951.000000 | 1.670214e+06 | 1.670214e+06 | 1.297984e+06 | 997149.000000 | 997149.000000 | 997149.000000 | 997149.000000 | 997149.000000 | 997149.000000 |
| mean | 1.923089e+06 | 2.783572e+05 | 1.595512e+04 | 1.752339e+05 | 1.961140e+05 | 6.697402e+03 | 2.278473e+05 | 1.248418e+01 | 9.964675e-01 | 0.079637 | ... | 0.773503 | -8.806797e+02 | 3.139511e+02 | 1.605408e+01 | 342209.855039 | 13826.269337 | 33767.774054 | 76582.403064 | 81992.343838 | 0.332570 |
| std | 5.325980e+05 | 1.028148e+05 | 1.478214e+04 | 2.927798e+05 | 3.185746e+05 | 2.092150e+04 | 3.153966e+05 | 3.334028e+00 | 5.932963e-02 | 0.107823 | ... | 0.100879 | 7.790997e+02 | 7.127443e+03 | 1.456729e+01 | 88916.115834 | 72444.869708 | 106857.034789 | 149647.415123 | 153303.516729 | 0.471134 |
| min | 1.000001e+06 | 1.000010e+05 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | -9.000000e-01 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | -0.000015 | ... | 0.373150 | -2.922000e+03 | -1.000000e+00 | 0.000000e+00 | -2922.000000 | -2892.000000 | -2801.000000 | -2889.000000 | -2874.000000 | 0.000000 |
| 25% | 1.461857e+06 | 1.893290e+05 | 6.321780e+03 | 1.872000e+04 | 2.416050e+04 | 0.000000e+00 | 5.084100e+04 | 1.000000e+01 | 1.000000e+00 | 0.000000 | ... | 0.715645 | -1.300000e+03 | -1.000000e+00 | 6.000000e+00 | 365243.000000 | -1628.000000 | -1242.000000 | -1314.000000 | -1270.000000 | 0.000000 |
| 50% | 1.923110e+06 | 2.787145e+05 | 1.125000e+04 | 7.104600e+04 | 8.054100e+04 | 1.638000e+03 | 1.123200e+05 | 1.200000e+01 | 1.000000e+00 | 0.051605 | ... | 0.835095 | -5.810000e+02 | 3.000000e+00 | 1.200000e+01 | 365243.000000 | -831.000000 | -361.000000 | -537.000000 | -499.000000 | 0.000000 |
| 75% | 2.384280e+06 | 3.675140e+05 | 2.065842e+04 | 1.803600e+05 | 2.164185e+05 | 7.740000e+03 | 2.340000e+05 | 1.500000e+01 | 1.000000e+00 | 0.108909 | ... | 0.852537 | -2.800000e+02 | 8.200000e+01 | 2.400000e+01 | 365243.000000 | -411.000000 | 129.000000 | -74.000000 | -44.000000 | 1.000000 |
| max | 2.845382e+06 | 4.562550e+05 | 4.180581e+05 | 6.905160e+06 | 6.905160e+06 | 3.060045e+06 | 6.905160e+06 | 2.300000e+01 | 1.000000e+00 | 1.000000 | ... | 1.000000 | -1.000000e+00 | 4.000000e+06 | 8.400000e+01 | 365243.000000 | 365243.000000 | 365243.000000 | 365243.000000 | 365243.000000 | 1.000000 |
8 rows × 21 columns
prev_app.info()#To check data type and null values for all columns
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1670214 entries, 0 to 1670213 Data columns (total 37 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 SK_ID_PREV 1670214 non-null int64 1 SK_ID_CURR 1670214 non-null int64 2 NAME_CONTRACT_TYPE 1670214 non-null object 3 AMT_ANNUITY 1297979 non-null float64 4 AMT_APPLICATION 1670214 non-null float64 5 AMT_CREDIT 1670213 non-null float64 6 AMT_DOWN_PAYMENT 774370 non-null float64 7 AMT_GOODS_PRICE 1284699 non-null float64 8 WEEKDAY_APPR_PROCESS_START 1670214 non-null object 9 HOUR_APPR_PROCESS_START 1670214 non-null int64 10 FLAG_LAST_APPL_PER_CONTRACT 1670214 non-null object 11 NFLAG_LAST_APPL_IN_DAY 1670214 non-null int64 12 RATE_DOWN_PAYMENT 774370 non-null float64 13 RATE_INTEREST_PRIMARY 5951 non-null float64 14 RATE_INTEREST_PRIVILEGED 5951 non-null float64 15 NAME_CASH_LOAN_PURPOSE 1670214 non-null object 16 NAME_CONTRACT_STATUS 1670214 non-null object 17 DAYS_DECISION 1670214 non-null int64 18 NAME_PAYMENT_TYPE 1670214 non-null object 19 CODE_REJECT_REASON 1670214 non-null object 20 NAME_TYPE_SUITE 849809 non-null object 21 NAME_CLIENT_TYPE 1670214 non-null object 22 NAME_GOODS_CATEGORY 1670214 non-null object 23 NAME_PORTFOLIO 1670214 non-null object 24 NAME_PRODUCT_TYPE 1670214 non-null object 25 CHANNEL_TYPE 1670214 non-null object 26 SELLERPLACE_AREA 1670214 non-null int64 27 NAME_SELLER_INDUSTRY 1670214 non-null object 28 CNT_PAYMENT 1297984 non-null float64 29 NAME_YIELD_GROUP 1670214 non-null object 30 PRODUCT_COMBINATION 1669868 non-null object 31 DAYS_FIRST_DRAWING 997149 non-null float64 32 DAYS_FIRST_DUE 997149 non-null float64 33 DAYS_LAST_DUE_1ST_VERSION 997149 non-null float64 34 DAYS_LAST_DUE 997149 non-null float64 35 DAYS_TERMINATION 997149 non-null float64 36 NFLAG_INSURED_ON_APPROVAL 997149 non-null float64 dtypes: float64(15), int64(6), object(16) memory usage: 471.5+ MB
cred_bal.head()
| SK_ID_PREV | SK_ID_CURR | MONTHS_BALANCE | AMT_BALANCE | AMT_CREDIT_LIMIT_ACTUAL | AMT_DRAWINGS_ATM_CURRENT | AMT_DRAWINGS_CURRENT | AMT_DRAWINGS_OTHER_CURRENT | AMT_DRAWINGS_POS_CURRENT | AMT_INST_MIN_REGULARITY | ... | AMT_RECIVABLE | AMT_TOTAL_RECEIVABLE | CNT_DRAWINGS_ATM_CURRENT | CNT_DRAWINGS_CURRENT | CNT_DRAWINGS_OTHER_CURRENT | CNT_DRAWINGS_POS_CURRENT | CNT_INSTALMENT_MATURE_CUM | NAME_CONTRACT_STATUS | SK_DPD | SK_DPD_DEF | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2562384 | 378907 | -6 | 56.970 | 135000 | 0.0 | 877.5 | 0.0 | 877.5 | 1700.325 | ... | 0.000 | 0.000 | 0.0 | 1 | 0.0 | 1.0 | 35.0 | Active | 0 | 0 |
| 1 | 2582071 | 363914 | -1 | 63975.555 | 45000 | 2250.0 | 2250.0 | 0.0 | 0.0 | 2250.000 | ... | 64875.555 | 64875.555 | 1.0 | 1 | 0.0 | 0.0 | 69.0 | Active | 0 | 0 |
| 2 | 1740877 | 371185 | -7 | 31815.225 | 450000 | 0.0 | 0.0 | 0.0 | 0.0 | 2250.000 | ... | 31460.085 | 31460.085 | 0.0 | 0 | 0.0 | 0.0 | 30.0 | Active | 0 | 0 |
| 3 | 1389973 | 337855 | -4 | 236572.110 | 225000 | 2250.0 | 2250.0 | 0.0 | 0.0 | 11795.760 | ... | 233048.970 | 233048.970 | 1.0 | 1 | 0.0 | 0.0 | 10.0 | Active | 0 | 0 |
| 4 | 1891521 | 126868 | -1 | 453919.455 | 450000 | 0.0 | 11547.0 | 0.0 | 11547.0 | 22924.890 | ... | 453919.455 | 453919.455 | 0.0 | 1 | 0.0 | 1.0 | 101.0 | Active | 0 | 0 |
5 rows × 23 columns
cred_bal.columns.values
array(['SK_ID_PREV', 'SK_ID_CURR', 'MONTHS_BALANCE', 'AMT_BALANCE',
'AMT_CREDIT_LIMIT_ACTUAL', 'AMT_DRAWINGS_ATM_CURRENT',
'AMT_DRAWINGS_CURRENT', 'AMT_DRAWINGS_OTHER_CURRENT',
'AMT_DRAWINGS_POS_CURRENT', 'AMT_INST_MIN_REGULARITY',
'AMT_PAYMENT_CURRENT', 'AMT_PAYMENT_TOTAL_CURRENT',
'AMT_RECEIVABLE_PRINCIPAL', 'AMT_RECIVABLE',
'AMT_TOTAL_RECEIVABLE', 'CNT_DRAWINGS_ATM_CURRENT',
'CNT_DRAWINGS_CURRENT', 'CNT_DRAWINGS_OTHER_CURRENT',
'CNT_DRAWINGS_POS_CURRENT', 'CNT_INSTALMENT_MATURE_CUM',
'NAME_CONTRACT_STATUS', 'SK_DPD', 'SK_DPD_DEF'], dtype=object)
cred_bal.describe()#Used to view some basic statistical details like percentile,mean,std etc.
| SK_ID_PREV | SK_ID_CURR | MONTHS_BALANCE | AMT_BALANCE | AMT_CREDIT_LIMIT_ACTUAL | AMT_DRAWINGS_ATM_CURRENT | AMT_DRAWINGS_CURRENT | AMT_DRAWINGS_OTHER_CURRENT | AMT_DRAWINGS_POS_CURRENT | AMT_INST_MIN_REGULARITY | ... | AMT_RECEIVABLE_PRINCIPAL | AMT_RECIVABLE | AMT_TOTAL_RECEIVABLE | CNT_DRAWINGS_ATM_CURRENT | CNT_DRAWINGS_CURRENT | CNT_DRAWINGS_OTHER_CURRENT | CNT_DRAWINGS_POS_CURRENT | CNT_INSTALMENT_MATURE_CUM | SK_DPD | SK_DPD_DEF | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 3.840312e+06 | 3.840312e+06 | 3.840312e+06 | 3.840312e+06 | 3.840312e+06 | 3.090496e+06 | 3.840312e+06 | 3.090496e+06 | 3.090496e+06 | 3.535076e+06 | ... | 3.840312e+06 | 3.840312e+06 | 3.840312e+06 | 3.090496e+06 | 3.840312e+06 | 3.090496e+06 | 3.090496e+06 | 3.535076e+06 | 3.840312e+06 | 3.840312e+06 |
| mean | 1.904504e+06 | 2.783242e+05 | -3.452192e+01 | 5.830016e+04 | 1.538080e+05 | 5.961325e+03 | 7.433388e+03 | 2.881696e+02 | 2.968805e+03 | 3.540204e+03 | ... | 5.596588e+04 | 5.808881e+04 | 5.809829e+04 | 3.094490e-01 | 7.031439e-01 | 4.812496e-03 | 5.594791e-01 | 2.082508e+01 | 9.283667e+00 | 3.316220e-01 |
| std | 5.364695e+05 | 1.027045e+05 | 2.666775e+01 | 1.063070e+05 | 1.651457e+05 | 2.822569e+04 | 3.384608e+04 | 8.201989e+03 | 2.079689e+04 | 5.600154e+03 | ... | 1.025336e+05 | 1.059654e+05 | 1.059718e+05 | 1.100401e+00 | 3.190347e+00 | 8.263861e-02 | 3.240649e+00 | 2.005149e+01 | 9.751570e+01 | 2.147923e+01 |
| min | 1.000018e+06 | 1.000060e+05 | -9.600000e+01 | -4.202502e+05 | 0.000000e+00 | -6.827310e+03 | -6.211620e+03 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ... | -4.233058e+05 | -4.202502e+05 | -4.202502e+05 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
| 25% | 1.434385e+06 | 1.895170e+05 | -5.500000e+01 | 0.000000e+00 | 4.500000e+04 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ... | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 4.000000e+00 | 0.000000e+00 | 0.000000e+00 |
| 50% | 1.897122e+06 | 2.783960e+05 | -2.800000e+01 | 0.000000e+00 | 1.125000e+05 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ... | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.500000e+01 | 0.000000e+00 | 0.000000e+00 |
| 75% | 2.369328e+06 | 3.675800e+05 | -1.100000e+01 | 8.904669e+04 | 1.800000e+05 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 6.633911e+03 | ... | 8.535924e+04 | 8.889949e+04 | 8.891451e+04 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 3.200000e+01 | 0.000000e+00 | 0.000000e+00 |
| max | 2.843496e+06 | 4.562500e+05 | -1.000000e+00 | 1.505902e+06 | 1.350000e+06 | 2.115000e+06 | 2.287098e+06 | 1.529847e+06 | 2.239274e+06 | 2.028820e+05 | ... | 1.472317e+06 | 1.493338e+06 | 1.493338e+06 | 5.100000e+01 | 1.650000e+02 | 1.200000e+01 | 1.650000e+02 | 1.200000e+02 | 3.260000e+03 | 3.260000e+03 |
8 rows × 22 columns
cred_bal.info()#To check data type and null values for all columns
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3840312 entries, 0 to 3840311 Data columns (total 23 columns): # Column Dtype --- ------ ----- 0 SK_ID_PREV int64 1 SK_ID_CURR int64 2 MONTHS_BALANCE int64 3 AMT_BALANCE float64 4 AMT_CREDIT_LIMIT_ACTUAL int64 5 AMT_DRAWINGS_ATM_CURRENT float64 6 AMT_DRAWINGS_CURRENT float64 7 AMT_DRAWINGS_OTHER_CURRENT float64 8 AMT_DRAWINGS_POS_CURRENT float64 9 AMT_INST_MIN_REGULARITY float64 10 AMT_PAYMENT_CURRENT float64 11 AMT_PAYMENT_TOTAL_CURRENT float64 12 AMT_RECEIVABLE_PRINCIPAL float64 13 AMT_RECIVABLE float64 14 AMT_TOTAL_RECEIVABLE float64 15 CNT_DRAWINGS_ATM_CURRENT float64 16 CNT_DRAWINGS_CURRENT int64 17 CNT_DRAWINGS_OTHER_CURRENT float64 18 CNT_DRAWINGS_POS_CURRENT float64 19 CNT_INSTALMENT_MATURE_CUM float64 20 NAME_CONTRACT_STATUS object 21 SK_DPD int64 22 SK_DPD_DEF int64 dtypes: float64(15), int64(7), object(1) memory usage: 673.9+ MB
bure.head()#Getting the first five rows
| SK_ID_CURR | SK_ID_BUREAU | CREDIT_ACTIVE | CREDIT_CURRENCY | DAYS_CREDIT | CREDIT_DAY_OVERDUE | DAYS_CREDIT_ENDDATE | DAYS_ENDDATE_FACT | AMT_CREDIT_MAX_OVERDUE | CNT_CREDIT_PROLONG | AMT_CREDIT_SUM | AMT_CREDIT_SUM_DEBT | AMT_CREDIT_SUM_LIMIT | AMT_CREDIT_SUM_OVERDUE | CREDIT_TYPE | DAYS_CREDIT_UPDATE | AMT_ANNUITY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 215354 | 5714462 | Closed | currency 1 | -497 | 0 | -153.0 | -153.0 | NaN | 0 | 91323.0 | 0.0 | NaN | 0.0 | Consumer credit | -131 | NaN |
| 1 | 215354 | 5714463 | Active | currency 1 | -208 | 0 | 1075.0 | NaN | NaN | 0 | 225000.0 | 171342.0 | NaN | 0.0 | Credit card | -20 | NaN |
| 2 | 215354 | 5714464 | Active | currency 1 | -203 | 0 | 528.0 | NaN | NaN | 0 | 464323.5 | NaN | NaN | 0.0 | Consumer credit | -16 | NaN |
| 3 | 215354 | 5714465 | Active | currency 1 | -203 | 0 | NaN | NaN | NaN | 0 | 90000.0 | NaN | NaN | 0.0 | Credit card | -16 | NaN |
| 4 | 215354 | 5714466 | Active | currency 1 | -629 | 0 | 1197.0 | NaN | 77674.5 | 0 | 2700000.0 | NaN | NaN | 0.0 | Consumer credit | -21 | NaN |
bure.columns.values
array(['SK_ID_CURR', 'SK_ID_BUREAU', 'CREDIT_ACTIVE', 'CREDIT_CURRENCY',
'DAYS_CREDIT', 'CREDIT_DAY_OVERDUE', 'DAYS_CREDIT_ENDDATE',
'DAYS_ENDDATE_FACT', 'AMT_CREDIT_MAX_OVERDUE',
'CNT_CREDIT_PROLONG', 'AMT_CREDIT_SUM', 'AMT_CREDIT_SUM_DEBT',
'AMT_CREDIT_SUM_LIMIT', 'AMT_CREDIT_SUM_OVERDUE', 'CREDIT_TYPE',
'DAYS_CREDIT_UPDATE', 'AMT_ANNUITY'], dtype=object)
bure.describe()#Used to view some basic statistical details like percentile,mean,std etc.
| SK_ID_CURR | SK_ID_BUREAU | DAYS_CREDIT | CREDIT_DAY_OVERDUE | DAYS_CREDIT_ENDDATE | DAYS_ENDDATE_FACT | AMT_CREDIT_MAX_OVERDUE | CNT_CREDIT_PROLONG | AMT_CREDIT_SUM | AMT_CREDIT_SUM_DEBT | AMT_CREDIT_SUM_LIMIT | AMT_CREDIT_SUM_OVERDUE | DAYS_CREDIT_UPDATE | AMT_ANNUITY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1.716428e+06 | 1.716428e+06 | 1.716428e+06 | 1.716428e+06 | 1.610875e+06 | 1.082775e+06 | 5.919400e+05 | 1.716428e+06 | 1.716415e+06 | 1.458759e+06 | 1.124648e+06 | 1.716428e+06 | 1.716428e+06 | 4.896370e+05 |
| mean | 2.782149e+05 | 5.924434e+06 | -1.142108e+03 | 8.181666e-01 | 5.105174e+02 | -1.017437e+03 | 3.825418e+03 | 6.410406e-03 | 3.549946e+05 | 1.370851e+05 | 6.229515e+03 | 3.791276e+01 | -5.937483e+02 | 1.571276e+04 |
| std | 1.029386e+05 | 5.322657e+05 | 7.951649e+02 | 3.654443e+01 | 4.994220e+03 | 7.140106e+02 | 2.060316e+05 | 9.622391e-02 | 1.149811e+06 | 6.774011e+05 | 4.503203e+04 | 5.937650e+03 | 7.207473e+02 | 3.258269e+05 |
| min | 1.000010e+05 | 5.000000e+06 | -2.922000e+03 | 0.000000e+00 | -4.206000e+04 | -4.202300e+04 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | -4.705600e+06 | -5.864061e+05 | 0.000000e+00 | -4.194700e+04 | 0.000000e+00 |
| 25% | 1.888668e+05 | 5.463954e+06 | -1.666000e+03 | 0.000000e+00 | -1.138000e+03 | -1.489000e+03 | 0.000000e+00 | 0.000000e+00 | 5.130000e+04 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | -9.080000e+02 | 0.000000e+00 |
| 50% | 2.780550e+05 | 5.926304e+06 | -9.870000e+02 | 0.000000e+00 | -3.300000e+02 | -8.970000e+02 | 0.000000e+00 | 0.000000e+00 | 1.255185e+05 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | -3.950000e+02 | 0.000000e+00 |
| 75% | 3.674260e+05 | 6.385681e+06 | -4.740000e+02 | 0.000000e+00 | 4.740000e+02 | -4.250000e+02 | 0.000000e+00 | 0.000000e+00 | 3.150000e+05 | 4.015350e+04 | 0.000000e+00 | 0.000000e+00 | -3.300000e+01 | 1.350000e+04 |
| max | 4.562550e+05 | 6.843457e+06 | 0.000000e+00 | 2.792000e+03 | 3.119900e+04 | 0.000000e+00 | 1.159872e+08 | 9.000000e+00 | 5.850000e+08 | 1.701000e+08 | 4.705600e+06 | 3.756681e+06 | 3.720000e+02 | 1.184534e+08 |
bure.info()#To check data type and null values for all columns
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1716428 entries, 0 to 1716427 Data columns (total 17 columns): # Column Dtype --- ------ ----- 0 SK_ID_CURR int64 1 SK_ID_BUREAU int64 2 CREDIT_ACTIVE object 3 CREDIT_CURRENCY object 4 DAYS_CREDIT int64 5 CREDIT_DAY_OVERDUE int64 6 DAYS_CREDIT_ENDDATE float64 7 DAYS_ENDDATE_FACT float64 8 AMT_CREDIT_MAX_OVERDUE float64 9 CNT_CREDIT_PROLONG int64 10 AMT_CREDIT_SUM float64 11 AMT_CREDIT_SUM_DEBT float64 12 AMT_CREDIT_SUM_LIMIT float64 13 AMT_CREDIT_SUM_OVERDUE float64 14 CREDIT_TYPE object 15 DAYS_CREDIT_UPDATE int64 16 AMT_ANNUITY float64 dtypes: float64(8), int64(6), object(3) memory usage: 222.6+ MB
# checking missing data
total = app_train.isnull().sum().sort_values(ascending = False)
percent = (app_train.isnull().sum()/app_train.isnull().count()*100).sort_values(ascending = False)
missing_app_train_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_app_train_data.head(20)
| Total | Percent | |
|---|---|---|
| COMMONAREA_MEDI | 214865 | 69.872297 |
| COMMONAREA_AVG | 214865 | 69.872297 |
| COMMONAREA_MODE | 214865 | 69.872297 |
| NONLIVINGAPARTMENTS_MODE | 213514 | 69.432963 |
| NONLIVINGAPARTMENTS_AVG | 213514 | 69.432963 |
| NONLIVINGAPARTMENTS_MEDI | 213514 | 69.432963 |
| FONDKAPREMONT_MODE | 210295 | 68.386172 |
| LIVINGAPARTMENTS_MODE | 210199 | 68.354953 |
| LIVINGAPARTMENTS_AVG | 210199 | 68.354953 |
| LIVINGAPARTMENTS_MEDI | 210199 | 68.354953 |
| FLOORSMIN_AVG | 208642 | 67.848630 |
| FLOORSMIN_MODE | 208642 | 67.848630 |
| FLOORSMIN_MEDI | 208642 | 67.848630 |
| YEARS_BUILD_MEDI | 204488 | 66.497784 |
| YEARS_BUILD_MODE | 204488 | 66.497784 |
| YEARS_BUILD_AVG | 204488 | 66.497784 |
| OWN_CAR_AGE | 202929 | 65.990810 |
| LANDAREA_MEDI | 182590 | 59.376738 |
| LANDAREA_MODE | 182590 | 59.376738 |
| LANDAREA_AVG | 182590 | 59.376738 |
# checking missing data
total = pos_bal.isnull().sum().sort_values(ascending = False)
percent = (pos_bal.isnull().sum()/pos_bal.isnull().count()*100).sort_values(ascending = False)
missing_pos_bal_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_pos_bal_data.head(3)
| Total | Percent | |
|---|---|---|
| CNT_INSTALMENT_FUTURE | 26087 | 0.260835 |
| CNT_INSTALMENT | 26071 | 0.260675 |
| SK_ID_PREV | 0 | 0.000000 |
# checking missing data
total = bure_bal.isnull().sum().sort_values(ascending = False)
percent = (bure_bal.isnull().sum()/bure_bal.isnull().count()*100).sort_values(ascending = False)
missing_bure_bal_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_bure_bal_data.head(3)
| Total | Percent | |
|---|---|---|
| SK_ID_BUREAU | 0 | 0.0 |
| MONTHS_BALANCE | 0 | 0.0 |
| STATUS | 0 | 0.0 |
# checking missing data
total = prev_app.isnull().sum().sort_values(ascending = False)
percent = (prev_app.isnull().sum()/prev_app.isnull().count()*100).sort_values(ascending = False)
missing_prev_app_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_prev_app_data.head(20)
| Total | Percent | |
|---|---|---|
| RATE_INTEREST_PRIVILEGED | 1664263 | 99.643698 |
| RATE_INTEREST_PRIMARY | 1664263 | 99.643698 |
| AMT_DOWN_PAYMENT | 895844 | 53.636480 |
| RATE_DOWN_PAYMENT | 895844 | 53.636480 |
| NAME_TYPE_SUITE | 820405 | 49.119754 |
| NFLAG_INSURED_ON_APPROVAL | 673065 | 40.298129 |
| DAYS_TERMINATION | 673065 | 40.298129 |
| DAYS_LAST_DUE | 673065 | 40.298129 |
| DAYS_LAST_DUE_1ST_VERSION | 673065 | 40.298129 |
| DAYS_FIRST_DUE | 673065 | 40.298129 |
| DAYS_FIRST_DRAWING | 673065 | 40.298129 |
| AMT_GOODS_PRICE | 385515 | 23.081773 |
| AMT_ANNUITY | 372235 | 22.286665 |
| CNT_PAYMENT | 372230 | 22.286366 |
| PRODUCT_COMBINATION | 346 | 0.020716 |
| AMT_CREDIT | 1 | 0.000060 |
| NAME_YIELD_GROUP | 0 | 0.000000 |
| NAME_PORTFOLIO | 0 | 0.000000 |
| NAME_SELLER_INDUSTRY | 0 | 0.000000 |
| SELLERPLACE_AREA | 0 | 0.000000 |
# checking missing data
total = inst_pay.isnull().sum().sort_values(ascending = False)
percent = (inst_pay.isnull().sum()/inst_pay.isnull().count()*100).sort_values(ascending = False)
missing_inst_pay_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_inst_pay_data.head(3)
| Total | Percent | |
|---|---|---|
| DAYS_ENTRY_PAYMENT | 2905 | 0.021352 |
| AMT_PAYMENT | 2905 | 0.021352 |
| SK_ID_PREV | 0 | 0.000000 |
# checking missing data
total = cred_bal.isnull().sum().sort_values(ascending = False)
percent = (cred_bal.isnull().sum()/cred_bal.isnull().count()*100).sort_values(ascending = False)
missing_cred_bal_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_cred_bal_data.head(10)
| Total | Percent | |
|---|---|---|
| AMT_PAYMENT_CURRENT | 767988 | 19.998063 |
| AMT_DRAWINGS_ATM_CURRENT | 749816 | 19.524872 |
| CNT_DRAWINGS_POS_CURRENT | 749816 | 19.524872 |
| AMT_DRAWINGS_OTHER_CURRENT | 749816 | 19.524872 |
| AMT_DRAWINGS_POS_CURRENT | 749816 | 19.524872 |
| CNT_DRAWINGS_OTHER_CURRENT | 749816 | 19.524872 |
| CNT_DRAWINGS_ATM_CURRENT | 749816 | 19.524872 |
| CNT_INSTALMENT_MATURE_CUM | 305236 | 7.948208 |
| AMT_INST_MIN_REGULARITY | 305236 | 7.948208 |
| SK_ID_PREV | 0 | 0.000000 |
# checking missing data
total = bure.isnull().sum().sort_values(ascending = False)
percent = (bure.isnull().sum()/bure.isnull().count()*100).sort_values(ascending = False)
missing_bure_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_bure_data.head(8)
| Total | Percent | |
|---|---|---|
| AMT_ANNUITY | 1226791 | 71.473490 |
| AMT_CREDIT_MAX_OVERDUE | 1124488 | 65.513264 |
| DAYS_ENDDATE_FACT | 633653 | 36.916958 |
| AMT_CREDIT_SUM_LIMIT | 591780 | 34.477415 |
| AMT_CREDIT_SUM_DEBT | 257669 | 15.011932 |
| DAYS_CREDIT_ENDDATE | 105553 | 6.149573 |
| AMT_CREDIT_SUM | 13 | 0.000757 |
| CREDIT_ACTIVE | 0 | 0.000000 |
# Distribution of AMT_CREDI
cf.go_offline()
app_train['AMT_CREDIT'].iplot(kind='histogram', bins=100, xTitle='Credit Amount',
yTitle='Count of applicants', title='Distribution of AMT_CREDIT')
cf.set_config_file(theme='pearl')
(app_train['DAYS_EMPLOYED']).iplot(kind='histogram',
xTitle = 'Days',bins=50,
yTitle='Count of applicants in %',
title='Days before the application the person started current employment')
#Distribution of AMT_INCOME_TOTAL
app_train[app_train['AMT_INCOME_TOTAL'] < 2000000]['AMT_INCOME_TOTAL'].iplot(kind='histogram', bins=100,
xTitle = 'Total Income', yTitle ='Count of applicants',
title='Distribution of AMT_INCOME_TOTAL')
# Who accompanied client when applying for the application
cf.set_config_file(theme='polar')
suite_val = (app_train['NAME_TYPE_SUITE'].value_counts()/len(app_train))*100
suite_val.iplot(kind='bar', xTitle = 'Name of type of the Suite',
yTitle='Count of applicants in %',
title='Who accompanied client when applying for the application in % ')
suite_val = app_train['NAME_TYPE_SUITE'].value_counts()
suite_val_y0 = app_train.loc[app_train['TARGET'] == 0, 'NAME_TYPE_SUITE'].value_counts()
suite_val_y1 = app_train.loc[app_train['TARGET'] == 1, 'NAME_TYPE_SUITE'].value_counts()
data = [
go.Bar(x=suite_val.index, y=(suite_val_y1 / suite_val.sum()) * 100, name='Yes'),
go.Bar(x=suite_val.index, y=(suite_val_y0 / suite_val.sum()) * 100, name='No')
]
layout = go.Layout(
title="Who accompanied the client when applying for the loan in terms of loan repayment status (%)",
xaxis=dict(title='Name of the type of Suite'),
yaxis=dict(title='Count of applicants (%)')
)
fig = go.Figure(data=data, layout=layout)
fig.layout.template = 'plotly_dark'
fig.show()
income_val = app_train['NAME_INCOME_TYPE'].value_counts()
income_val_y0 = []
income_val_y1 = []
for val in income_val.index:
income_val_y1.append(np.sum(app_train['TARGET'][app_train['NAME_INCOME_TYPE']==val] == 1))
income_val_y0.append(np.sum(app_train['TARGET'][app_train['NAME_INCOME_TYPE']==val] == 0))
data = [go.Bar(x = income_val.index, y = ((income_val_y1 / income_val.sum()) * 100), name='Yes' ),
go.Bar(x = income_val.index, y = ((income_val_y0 / income_val.sum()) * 100), name='No' )]
layout = go.Layout(
title = "Income sources of Applicants in terms of loan is repayed or not in %",
xaxis=dict(
title='Income source',
),
yaxis=dict(
title='Count of applicants in %',
)
)
fig = go.Figure(data = data, layout=layout)
fig.layout.template = 'plotly_dark'
py.iplot(fig)
education_val = app_train['NAME_EDUCATION_TYPE'].value_counts()
education_val_y0 = []
education_val_y1 = []
for val in education_val.index:
education_val_y1.append(np.sum(app_train['TARGET'][app_train['NAME_EDUCATION_TYPE']==val] == 1))
education_val_y0.append(np.sum(app_train['TARGET'][app_train['NAME_EDUCATION_TYPE']==val] == 0))
data = [go.Bar(x = education_val.index, y = ((education_val_y1 / education_val.sum()) * 100), name='Yes' ),
go.Bar(x = education_val.index, y = ((education_val_y0 / education_val.sum()) * 100), name='No' )]
layout = go.Layout(
title = "Education sources of Applicants in terms of loan is repayed or not in %",
xaxis=dict(
title='Education of Applicants',
),
yaxis=dict(
title='Count of applicants in %',
)
)
fig = go.Figure(data = data, layout=layout)
fig.layout.template = 'plotly_dark'
py.iplot(fig)
education_val = app_train['NAME_FAMILY_STATUS'].value_counts()
education_val_y0 = []
education_val_y1 = []
for val in education_val.index:
education_val_y1.append(np.sum(app_train['TARGET'][app_train['NAME_FAMILY_STATUS']==val] == 1))
education_val_y0.append(np.sum(app_train['TARGET'][app_train['NAME_FAMILY_STATUS']==val] == 0))
data = [go.Bar(x = education_val.index, y = ((education_val_y1 / education_val.sum()) * 100), name='Yes' ),
go.Bar(x = education_val.index, y = ((education_val_y0 / education_val.sum()) * 100), name='No' )]
layout = go.Layout(
title = "Family status of Applicants in terms of loan is repayed or not.%",
xaxis=dict(
title='Education of Applicants',
),
yaxis=dict(
title='Count of applicants in %',
)
)
fig = go.Figure(data = data, layout=layout)
fig.layout.template = 'plotly_dark'
py.iplot(fig)
temp = app_train["TARGET"].value_counts()
df = pd.DataFrame({'labels': temp.index,
'values': temp.values
})
df.iplot(kind='pie',labels='labels',values='values', title='Loan Repayed or not')
cf.set_config_file(theme='polar')
contract_val = app_train['NAME_CONTRACT_TYPE'].value_counts()
contract_df = pd.DataFrame({'labels': contract_val.index,
'values': contract_val.values
})
contract_df.iplot(kind='pie',labels='labels',values='values', title='Types of Loan', hole = 0.6)
temp = app_train["NAME_FAMILY_STATUS"].value_counts()
df = pd.DataFrame({'labels': temp.index,
'values': temp.values
})
df.iplot(kind='pie',labels='labels',values='values', title='Family Status of Applicant\'s', hole = 0.5)
temp = app_train["NAME_INCOME_TYPE"].value_counts()
df = pd.DataFrame({'labels': temp.index,
'values': temp.values
})
df.iplot(kind='pie',labels='labels',values='values', title='Income sources of Applicant\'s', hole = 0.5)
temp = app_train["NAME_EDUCATION_TYPE"].value_counts()
df = pd.DataFrame({'labels': temp.index,
'values': temp.values
})
df.iplot(kind='pie',labels='labels',values='values', title='Education of Applicant\'s', hole = 0.5)
temp = app_train["NAME_HOUSING_TYPE"].value_counts()
df = pd.DataFrame({'labels': temp.index,
'values': temp.values
})
df.iplot(kind='pie',labels='labels',values='values', title='Type of House', hole = 0.5)
temp = app_train["OCCUPATION_TYPE"].value_counts()
temp.iplot(kind='bar', xTitle = 'Occupation', yTitle = "Count", title = 'Occupation of Applicant\'s who applied for loan', color = 'green')
temp = app_train["ORGANIZATION_TYPE"].value_counts()
temp.iplot(kind='bar', xTitle = 'Organization Name', yTitle = "Count", title = 'Types of Organizations who applied for loan ', color = 'red')
temp = prev_app["NAME_CONTRACT_STATUS"].value_counts()
df = pd.DataFrame({'labels': temp.index,
'values': temp.values
})
df.iplot(kind='pie',labels='labels',values='values', title='Contract approved or not', hole = 0.5)
temp = prev_app["NAME_PAYMENT_TYPE"].value_counts()
df = pd.DataFrame({'labels': temp.index,
'values': temp.values
})
df.iplot(kind='pie',labels='labels',values='values', title='Payment method that client choose to pay for the previous application', hole = 0.7,colors=['#75e575', '#ea7c96',])
Data Preparation: Feature Engeenireing
# Flag to represent when Total income is greater than Credit
app_train['INCOME_GT_CREDIT_FLAG'] = app_train['AMT_INCOME_TOTAL'] > app_train['AMT_CREDIT']
# Column to represent Credit Income Percent
app_train['CREDIT_INCOME_PERCENT'] = app_train['AMT_CREDIT'] / app_train['AMT_INCOME_TOTAL']
# Column to represent Annuity Income percent
app_train['ANNUITY_INCOME_PERCENT'] = app_train['AMT_ANNUITY'] / app_train['AMT_INCOME_TOTAL']
# Column to represent Credit Term
app_train['CREDIT_TERM'] = app_train['AMT_CREDIT'] / app_train['AMT_ANNUITY']
# Column to represent Days Employed percent in his life
app_train['DAYS_EMPLOYED_PERCENT'] = app_train['DAYS_EMPLOYED'] / app_train['DAYS_BIRTH']
# Shape of Application data
print('The shape of application data :',app_train.shape)
The shape of application data : (307511, 127)
Joining Bureau data to Application data:
# Combining numerical features
grp = bure.drop(['SK_ID_BUREAU'], axis = 1).groupby(by=['SK_ID_CURR']).mean().reset_index()
grp.columns = ['BUREAU_'+column if column !='SK_ID_CURR' else column for column in grp.columns]
app_bure = app_train.merge(grp, on='SK_ID_CURR', how='left')
app_bure.update(app_bure[grp.columns].fillna(0))
# Combining categorical features
bure_cat = pd.get_dummies(bure.select_dtypes('object'))
bure_cat['SK_ID_CURR'] = bure['SK_ID_CURR']
grp = bure_cat.groupby(by = ['SK_ID_CURR']).mean().reset_index()
grp.columns = ['BUREAU_'+column if column !='SK_ID_CURR' else column for column in grp.columns]
app_bure = app_bure.merge(grp, on='SK_ID_CURR', how='left')
app_bure.update(app_bure[grp.columns].fillna(0))
# Shape of application and bureau data combined
print('The shape application and bureau data combined:',app_bure.shape)
The shape application and bureau data combined: (307511, 162)
Joining Previous Application data to Application Bureau data:
# Number of previous applications per customer
grp = prev_app[['SK_ID_CURR','SK_ID_PREV']].groupby(by=['SK_ID_CURR'])['SK_ID_PREV'].count().reset_index().rename(columns={'SK_ID_PREV':'PREV_APP_COUNT'})
app_bure_prev = app_bure.merge(grp, on =['SK_ID_CURR'], how = 'left')
app_bure_prev['PREV_APP_COUNT'] = app_bure_prev['PREV_APP_COUNT'].fillna(0)
# Combining numerical features
grp = prev_app.drop('SK_ID_PREV', axis =1).groupby(by=['SK_ID_CURR']).mean().reset_index()
prev_columns = ['PREV_'+column if column != 'SK_ID_CURR' else column for column in grp.columns ]
grp.columns = prev_columns
app_bure_prev = app_bure_prev.merge(grp, on =['SK_ID_CURR'], how = 'left')
app_bure_prev.update(app_bure_prev[grp.columns].fillna(0))
# Combining categorical features
prev_cat = pd.get_dummies(prev_app.select_dtypes('object'))
prev_cat['SK_ID_CURR'] = prev_app['SK_ID_CURR']
prev_cat.head()
grp = prev_cat.groupby('SK_ID_CURR').mean().reset_index()
grp.columns = ['PREV_'+column if column != 'SK_ID_CURR' else column for column in grp.columns]
app_bure_prev = app_bure_prev.merge(grp, on=['SK_ID_CURR'], how='left')
app_bure_prev.update(app_bure_prev[grp.columns].fillna(0))
Joining POS_CASH_balance data to application_bureau_prev_data:
# Combining numerical features
grp = pos_bal.drop('SK_ID_PREV', axis =1).groupby(by=['SK_ID_CURR']).mean().reset_index()
prev_columns = ['POS_'+column if column != 'SK_ID_CURR' else column for column in grp.columns ]
grp.columns = prev_columns
app_bure_prev = app_bure_prev.merge(grp, on =['SK_ID_CURR'], how = 'left')
app_bure_prev.update(app_bure_prev[grp.columns].fillna(0))
# Combining categorical features
pos_bal_cat = pd.get_dummies(pos_bal.select_dtypes('object'))
pos_bal_cat['SK_ID_CURR'] = pos_bal['SK_ID_CURR']
grp = pos_bal_cat.groupby('SK_ID_CURR').mean().reset_index()
grp.columns = ['POS_'+column if column != 'SK_ID_CURR' else column for column in grp.columns]
app_bure_prev = app_bure_prev.merge(grp, on=['SK_ID_CURR'], how='left')
app_bure_prev.update(app_bure_prev[grp.columns].fillna(0))
Joining Installments Payments data to application_bureau_prev_data:
# Combining numerical features and there are no categorical features in this dataset
grp = inst_pay.drop('SK_ID_PREV', axis =1).groupby(by=['SK_ID_CURR']).mean().reset_index()
prev_columns = ['INSTA_'+column if column != 'SK_ID_CURR' else column for column in grp.columns ]
grp.columns = prev_columns
app_bure_prev = app_bure_prev.merge(grp, on =['SK_ID_CURR'], how = 'left')
app_bure_prev.update(app_bure_prev[grp.columns].fillna(0))
Joining Credit card balance data to application_bureau_prev data:
# Combining numerical features
grp = cred_bal.drop('SK_ID_PREV', axis =1).groupby(by=['SK_ID_CURR']).mean().reset_index()
prev_columns = ['CREDIT_'+column if column != 'SK_ID_CURR' else column for column in grp.columns ]
grp.columns = prev_columns
app_bure_prev = app_bure_prev.merge(grp, on =['SK_ID_CURR'], how = 'left')
app_bure_prev.update(app_bure_prev[grp.columns].fillna(0))
# Combining categorical features
cred_cat = pd.get_dummies(cred_bal.select_dtypes('object'))
cred_cat['SK_ID_CURR'] = cred_bal['SK_ID_CURR']
grp = cred_cat.groupby('SK_ID_CURR').mean().reset_index()
grp.columns = ['CREDIT_'+column if column != 'SK_ID_CURR' else column for column in grp.columns]
app_bure_prev = app_bure_prev.merge(grp, on=['SK_ID_CURR'], how='left')
app_bure_prev.update(app_bure_prev[grp.columns].fillna(0))
app_bure_prev
| SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | ... | CREDIT_CNT_INSTALMENT_MATURE_CUM | CREDIT_SK_DPD | CREDIT_SK_DPD_DEF | CREDIT_NAME_CONTRACT_STATUS_Active | CREDIT_NAME_CONTRACT_STATUS_Approved | CREDIT_NAME_CONTRACT_STATUS_Completed | CREDIT_NAME_CONTRACT_STATUS_Demand | CREDIT_NAME_CONTRACT_STATUS_Refused | CREDIT_NAME_CONTRACT_STATUS_Sent proposal | CREDIT_NAME_CONTRACT_STATUS_Signed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100002 | 1 | Cash loans | M | N | Y | 0 | 202500.0 | 406597.5 | 24700.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 100003 | 0 | Cash loans | F | N | N | 0 | 270000.0 | 1293502.5 | 35698.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 100004 | 0 | Revolving loans | M | Y | Y | 0 | 67500.0 | 135000.0 | 6750.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 100006 | 0 | Cash loans | F | N | Y | 0 | 135000.0 | 312682.5 | 29686.5 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 100007 | 0 | Cash loans | M | N | Y | 0 | 121500.0 | 513000.0 | 21865.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 307506 | 456251 | 0 | Cash loans | M | N | N | 0 | 157500.0 | 254700.0 | 27558.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307507 | 456252 | 0 | Cash loans | F | N | Y | 0 | 72000.0 | 269550.0 | 12001.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307508 | 456253 | 0 | Cash loans | F | N | Y | 0 | 153000.0 | 677664.0 | 29979.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307509 | 456254 | 1 | Cash loans | F | N | Y | 0 | 171000.0 | 370107.0 | 20205.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307510 | 456255 | 0 | Cash loans | F | N | N | 0 | 157500.0 | 675000.0 | 49117.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
307511 rows × 372 columns
app_bure_prev.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 307511 entries, 0 to 307510 Columns: 372 entries, SK_ID_CURR to CREDIT_NAME_CONTRACT_STATUS_Signed dtypes: bool(1), float64(314), int64(41), object(16) memory usage: 873.1+ MB
for col in app_bure_prev.columns:
print(col,app_bure_prev[col].isnull().sum())
SK_ID_CURR 0 TARGET 0 NAME_CONTRACT_TYPE 0 CODE_GENDER 0 FLAG_OWN_CAR 0 FLAG_OWN_REALTY 0 CNT_CHILDREN 0 AMT_INCOME_TOTAL 0 AMT_CREDIT 0 AMT_ANNUITY 12 AMT_GOODS_PRICE 278 NAME_TYPE_SUITE 1292 NAME_INCOME_TYPE 0 NAME_EDUCATION_TYPE 0 NAME_FAMILY_STATUS 0 NAME_HOUSING_TYPE 0 REGION_POPULATION_RELATIVE 0 DAYS_BIRTH 0 DAYS_EMPLOYED 0 DAYS_REGISTRATION 0 DAYS_ID_PUBLISH 0 OWN_CAR_AGE 202929 FLAG_MOBIL 0 FLAG_EMP_PHONE 0 FLAG_WORK_PHONE 0 FLAG_CONT_MOBILE 0 FLAG_PHONE 0 FLAG_EMAIL 0 OCCUPATION_TYPE 96391 CNT_FAM_MEMBERS 2 REGION_RATING_CLIENT 0 REGION_RATING_CLIENT_W_CITY 0 WEEKDAY_APPR_PROCESS_START 0 HOUR_APPR_PROCESS_START 0 REG_REGION_NOT_LIVE_REGION 0 REG_REGION_NOT_WORK_REGION 0 LIVE_REGION_NOT_WORK_REGION 0 REG_CITY_NOT_LIVE_CITY 0 REG_CITY_NOT_WORK_CITY 0 LIVE_CITY_NOT_WORK_CITY 0 ORGANIZATION_TYPE 0 EXT_SOURCE_1 173378 EXT_SOURCE_2 660 EXT_SOURCE_3 60965 APARTMENTS_AVG 156061 BASEMENTAREA_AVG 179943 YEARS_BEGINEXPLUATATION_AVG 150007 YEARS_BUILD_AVG 204488 COMMONAREA_AVG 214865 ELEVATORS_AVG 163891 ENTRANCES_AVG 154828 FLOORSMAX_AVG 153020 FLOORSMIN_AVG 208642 LANDAREA_AVG 182590 LIVINGAPARTMENTS_AVG 210199 LIVINGAREA_AVG 154350 NONLIVINGAPARTMENTS_AVG 213514 NONLIVINGAREA_AVG 169682 APARTMENTS_MODE 156061 BASEMENTAREA_MODE 179943 YEARS_BEGINEXPLUATATION_MODE 150007 YEARS_BUILD_MODE 204488 COMMONAREA_MODE 214865 ELEVATORS_MODE 163891 ENTRANCES_MODE 154828 FLOORSMAX_MODE 153020 FLOORSMIN_MODE 208642 LANDAREA_MODE 182590 LIVINGAPARTMENTS_MODE 210199 LIVINGAREA_MODE 154350 NONLIVINGAPARTMENTS_MODE 213514 NONLIVINGAREA_MODE 169682 APARTMENTS_MEDI 156061 BASEMENTAREA_MEDI 179943 YEARS_BEGINEXPLUATATION_MEDI 150007 YEARS_BUILD_MEDI 204488 COMMONAREA_MEDI 214865 ELEVATORS_MEDI 163891 ENTRANCES_MEDI 154828 FLOORSMAX_MEDI 153020 FLOORSMIN_MEDI 208642 LANDAREA_MEDI 182590 LIVINGAPARTMENTS_MEDI 210199 LIVINGAREA_MEDI 154350 NONLIVINGAPARTMENTS_MEDI 213514 NONLIVINGAREA_MEDI 169682 FONDKAPREMONT_MODE 210295 HOUSETYPE_MODE 154297 TOTALAREA_MODE 148431 WALLSMATERIAL_MODE 156341 EMERGENCYSTATE_MODE 145755 OBS_30_CNT_SOCIAL_CIRCLE 1021 DEF_30_CNT_SOCIAL_CIRCLE 1021 OBS_60_CNT_SOCIAL_CIRCLE 1021 DEF_60_CNT_SOCIAL_CIRCLE 1021 DAYS_LAST_PHONE_CHANGE 1 FLAG_DOCUMENT_2 0 FLAG_DOCUMENT_3 0 FLAG_DOCUMENT_4 0 FLAG_DOCUMENT_5 0 FLAG_DOCUMENT_6 0 FLAG_DOCUMENT_7 0 FLAG_DOCUMENT_8 0 FLAG_DOCUMENT_9 0 FLAG_DOCUMENT_10 0 FLAG_DOCUMENT_11 0 FLAG_DOCUMENT_12 0 FLAG_DOCUMENT_13 0 FLAG_DOCUMENT_14 0 FLAG_DOCUMENT_15 0 FLAG_DOCUMENT_16 0 FLAG_DOCUMENT_17 0 FLAG_DOCUMENT_18 0 FLAG_DOCUMENT_19 0 FLAG_DOCUMENT_20 0 FLAG_DOCUMENT_21 0 AMT_REQ_CREDIT_BUREAU_HOUR 41519 AMT_REQ_CREDIT_BUREAU_DAY 41519 AMT_REQ_CREDIT_BUREAU_WEEK 41519 AMT_REQ_CREDIT_BUREAU_MON 41519 AMT_REQ_CREDIT_BUREAU_QRT 41519 AMT_REQ_CREDIT_BUREAU_YEAR 41519 INCOME_GT_CREDIT_FLAG 0 CREDIT_INCOME_PERCENT 0 ANNUITY_INCOME_PERCENT 12 CREDIT_TERM 12 DAYS_EMPLOYED_PERCENT 0 BUREAU_DAYS_CREDIT 0 BUREAU_CREDIT_DAY_OVERDUE 0 BUREAU_DAYS_CREDIT_ENDDATE 0 BUREAU_DAYS_ENDDATE_FACT 0 BUREAU_AMT_CREDIT_MAX_OVERDUE 0 BUREAU_CNT_CREDIT_PROLONG 0 BUREAU_AMT_CREDIT_SUM 0 BUREAU_AMT_CREDIT_SUM_DEBT 0 BUREAU_AMT_CREDIT_SUM_LIMIT 0 BUREAU_AMT_CREDIT_SUM_OVERDUE 0 BUREAU_DAYS_CREDIT_UPDATE 0 BUREAU_AMT_ANNUITY 0 BUREAU_CREDIT_ACTIVE_Active 0 BUREAU_CREDIT_ACTIVE_Bad debt 0 BUREAU_CREDIT_ACTIVE_Closed 0 BUREAU_CREDIT_ACTIVE_Sold 0 BUREAU_CREDIT_CURRENCY_currency 1 0 BUREAU_CREDIT_CURRENCY_currency 2 0 BUREAU_CREDIT_CURRENCY_currency 3 0 BUREAU_CREDIT_CURRENCY_currency 4 0 BUREAU_CREDIT_TYPE_Another type of loan 0 BUREAU_CREDIT_TYPE_Car loan 0 BUREAU_CREDIT_TYPE_Cash loan (non-earmarked) 0 BUREAU_CREDIT_TYPE_Consumer credit 0 BUREAU_CREDIT_TYPE_Credit card 0 BUREAU_CREDIT_TYPE_Interbank credit 0 BUREAU_CREDIT_TYPE_Loan for business development 0 BUREAU_CREDIT_TYPE_Loan for purchase of shares (margin lending) 0 BUREAU_CREDIT_TYPE_Loan for the purchase of equipment 0 BUREAU_CREDIT_TYPE_Loan for working capital replenishment 0 BUREAU_CREDIT_TYPE_Microloan 0 BUREAU_CREDIT_TYPE_Mobile operator loan 0 BUREAU_CREDIT_TYPE_Mortgage 0 BUREAU_CREDIT_TYPE_Real estate loan 0 BUREAU_CREDIT_TYPE_Unknown type of loan 0 PREV_APP_COUNT 0 PREV_AMT_ANNUITY 0 PREV_AMT_APPLICATION 0 PREV_AMT_CREDIT 0 PREV_AMT_DOWN_PAYMENT 0 PREV_AMT_GOODS_PRICE 0 PREV_HOUR_APPR_PROCESS_START 0 PREV_NFLAG_LAST_APPL_IN_DAY 0 PREV_RATE_DOWN_PAYMENT 0 PREV_RATE_INTEREST_PRIMARY 0 PREV_RATE_INTEREST_PRIVILEGED 0 PREV_DAYS_DECISION 0 PREV_SELLERPLACE_AREA 0 PREV_CNT_PAYMENT 0 PREV_DAYS_FIRST_DRAWING 0 PREV_DAYS_FIRST_DUE 0 PREV_DAYS_LAST_DUE_1ST_VERSION 0 PREV_DAYS_LAST_DUE 0 PREV_DAYS_TERMINATION 0 PREV_NFLAG_INSURED_ON_APPROVAL 0 PREV_NAME_CONTRACT_TYPE_Cash loans 0 PREV_NAME_CONTRACT_TYPE_Consumer loans 0 PREV_NAME_CONTRACT_TYPE_Revolving loans 0 PREV_NAME_CONTRACT_TYPE_XNA 0 PREV_WEEKDAY_APPR_PROCESS_START_FRIDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_MONDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SATURDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SUNDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_THURSDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_TUESDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_WEDNESDAY 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_N 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_Y 0 PREV_NAME_CASH_LOAN_PURPOSE_Building a house or an annex 0 PREV_NAME_CASH_LOAN_PURPOSE_Business development 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a garage 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a home 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a new car 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a used car 0 PREV_NAME_CASH_LOAN_PURPOSE_Car repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Education 0 PREV_NAME_CASH_LOAN_PURPOSE_Everyday expenses 0 PREV_NAME_CASH_LOAN_PURPOSE_Furniture 0 PREV_NAME_CASH_LOAN_PURPOSE_Gasification / water supply 0 PREV_NAME_CASH_LOAN_PURPOSE_Hobby 0 PREV_NAME_CASH_LOAN_PURPOSE_Journey 0 PREV_NAME_CASH_LOAN_PURPOSE_Medicine 0 PREV_NAME_CASH_LOAN_PURPOSE_Money for a third person 0 PREV_NAME_CASH_LOAN_PURPOSE_Other 0 PREV_NAME_CASH_LOAN_PURPOSE_Payments on other loans 0 PREV_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment 0 PREV_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal 0 PREV_NAME_CASH_LOAN_PURPOSE_Repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Urgent needs 0 PREV_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday 0 PREV_NAME_CASH_LOAN_PURPOSE_XAP 0 PREV_NAME_CASH_LOAN_PURPOSE_XNA 0 PREV_NAME_CONTRACT_STATUS_Approved 0 PREV_NAME_CONTRACT_STATUS_Canceled 0 PREV_NAME_CONTRACT_STATUS_Refused 0 PREV_NAME_CONTRACT_STATUS_Unused offer 0 PREV_NAME_PAYMENT_TYPE_Cash through the bank 0 PREV_NAME_PAYMENT_TYPE_Cashless from the account of the employer 0 PREV_NAME_PAYMENT_TYPE_Non-cash from your account 0 PREV_NAME_PAYMENT_TYPE_XNA 0 PREV_CODE_REJECT_REASON_CLIENT 0 PREV_CODE_REJECT_REASON_HC 0 PREV_CODE_REJECT_REASON_LIMIT 0 PREV_CODE_REJECT_REASON_SCO 0 PREV_CODE_REJECT_REASON_SCOFR 0 PREV_CODE_REJECT_REASON_SYSTEM 0 PREV_CODE_REJECT_REASON_VERIF 0 PREV_CODE_REJECT_REASON_XAP 0 PREV_CODE_REJECT_REASON_XNA 0 PREV_NAME_TYPE_SUITE_Children 0 PREV_NAME_TYPE_SUITE_Family 0 PREV_NAME_TYPE_SUITE_Group of people 0 PREV_NAME_TYPE_SUITE_Other_A 0 PREV_NAME_TYPE_SUITE_Other_B 0 PREV_NAME_TYPE_SUITE_Spouse, partner 0 PREV_NAME_TYPE_SUITE_Unaccompanied 0 PREV_NAME_CLIENT_TYPE_New 0 PREV_NAME_CLIENT_TYPE_Refreshed 0 PREV_NAME_CLIENT_TYPE_Repeater 0 PREV_NAME_CLIENT_TYPE_XNA 0 PREV_NAME_GOODS_CATEGORY_Additional Service 0 PREV_NAME_GOODS_CATEGORY_Animals 0 PREV_NAME_GOODS_CATEGORY_Audio/Video 0 PREV_NAME_GOODS_CATEGORY_Auto Accessories 0 PREV_NAME_GOODS_CATEGORY_Clothing and Accessories 0 PREV_NAME_GOODS_CATEGORY_Computers 0 PREV_NAME_GOODS_CATEGORY_Construction Materials 0 PREV_NAME_GOODS_CATEGORY_Consumer Electronics 0 PREV_NAME_GOODS_CATEGORY_Direct Sales 0 PREV_NAME_GOODS_CATEGORY_Education 0 PREV_NAME_GOODS_CATEGORY_Fitness 0 PREV_NAME_GOODS_CATEGORY_Furniture 0 PREV_NAME_GOODS_CATEGORY_Gardening 0 PREV_NAME_GOODS_CATEGORY_Homewares 0 PREV_NAME_GOODS_CATEGORY_House Construction 0 PREV_NAME_GOODS_CATEGORY_Insurance 0 PREV_NAME_GOODS_CATEGORY_Jewelry 0 PREV_NAME_GOODS_CATEGORY_Medical Supplies 0 PREV_NAME_GOODS_CATEGORY_Medicine 0 PREV_NAME_GOODS_CATEGORY_Mobile 0 PREV_NAME_GOODS_CATEGORY_Office Appliances 0 PREV_NAME_GOODS_CATEGORY_Other 0 PREV_NAME_GOODS_CATEGORY_Photo / Cinema Equipment 0 PREV_NAME_GOODS_CATEGORY_Sport and Leisure 0 PREV_NAME_GOODS_CATEGORY_Tourism 0 PREV_NAME_GOODS_CATEGORY_Vehicles 0 PREV_NAME_GOODS_CATEGORY_Weapon 0 PREV_NAME_GOODS_CATEGORY_XNA 0 PREV_NAME_PORTFOLIO_Cards 0 PREV_NAME_PORTFOLIO_Cars 0 PREV_NAME_PORTFOLIO_Cash 0 PREV_NAME_PORTFOLIO_POS 0 PREV_NAME_PORTFOLIO_XNA 0 PREV_NAME_PRODUCT_TYPE_XNA 0 PREV_NAME_PRODUCT_TYPE_walk-in 0 PREV_NAME_PRODUCT_TYPE_x-sell 0 PREV_CHANNEL_TYPE_AP+ (Cash loan) 0 PREV_CHANNEL_TYPE_Car dealer 0 PREV_CHANNEL_TYPE_Channel of corporate sales 0 PREV_CHANNEL_TYPE_Contact center 0 PREV_CHANNEL_TYPE_Country-wide 0 PREV_CHANNEL_TYPE_Credit and cash offices 0 PREV_CHANNEL_TYPE_Regional / Local 0 PREV_CHANNEL_TYPE_Stone 0 PREV_NAME_SELLER_INDUSTRY_Auto technology 0 PREV_NAME_SELLER_INDUSTRY_Clothing 0 PREV_NAME_SELLER_INDUSTRY_Connectivity 0 PREV_NAME_SELLER_INDUSTRY_Construction 0 PREV_NAME_SELLER_INDUSTRY_Consumer electronics 0 PREV_NAME_SELLER_INDUSTRY_Furniture 0 PREV_NAME_SELLER_INDUSTRY_Industry 0 PREV_NAME_SELLER_INDUSTRY_Jewelry 0 PREV_NAME_SELLER_INDUSTRY_MLM partners 0 PREV_NAME_SELLER_INDUSTRY_Tourism 0 PREV_NAME_SELLER_INDUSTRY_XNA 0 PREV_NAME_YIELD_GROUP_XNA 0 PREV_NAME_YIELD_GROUP_high 0 PREV_NAME_YIELD_GROUP_low_action 0 PREV_NAME_YIELD_GROUP_low_normal 0 PREV_NAME_YIELD_GROUP_middle 0 PREV_PRODUCT_COMBINATION_Card Street 0 PREV_PRODUCT_COMBINATION_Card X-Sell 0 PREV_PRODUCT_COMBINATION_Cash 0 PREV_PRODUCT_COMBINATION_Cash Street: high 0 PREV_PRODUCT_COMBINATION_Cash Street: low 0 PREV_PRODUCT_COMBINATION_Cash Street: middle 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: high 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: low 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: middle 0 PREV_PRODUCT_COMBINATION_POS household with interest 0 PREV_PRODUCT_COMBINATION_POS household without interest 0 PREV_PRODUCT_COMBINATION_POS industry with interest 0 PREV_PRODUCT_COMBINATION_POS industry without interest 0 PREV_PRODUCT_COMBINATION_POS mobile with interest 0 PREV_PRODUCT_COMBINATION_POS mobile without interest 0 PREV_PRODUCT_COMBINATION_POS other with interest 0 PREV_PRODUCT_COMBINATION_POS others without interest 0 POS_MONTHS_BALANCE 0 POS_CNT_INSTALMENT 0 POS_CNT_INSTALMENT_FUTURE 0 POS_SK_DPD 0 POS_SK_DPD_DEF 0 POS_NAME_CONTRACT_STATUS_Active 0 POS_NAME_CONTRACT_STATUS_Amortized debt 0 POS_NAME_CONTRACT_STATUS_Approved 0 POS_NAME_CONTRACT_STATUS_Canceled 0 POS_NAME_CONTRACT_STATUS_Completed 0 POS_NAME_CONTRACT_STATUS_Demand 0 POS_NAME_CONTRACT_STATUS_Returned to the store 0 POS_NAME_CONTRACT_STATUS_Signed 0 POS_NAME_CONTRACT_STATUS_XNA 0 INSTA_NUM_INSTALMENT_VERSION 0 INSTA_NUM_INSTALMENT_NUMBER 0 INSTA_DAYS_INSTALMENT 0 INSTA_DAYS_ENTRY_PAYMENT 0 INSTA_AMT_INSTALMENT 0 INSTA_AMT_PAYMENT 0 CREDIT_MONTHS_BALANCE 0 CREDIT_AMT_BALANCE 0 CREDIT_AMT_CREDIT_LIMIT_ACTUAL 0 CREDIT_AMT_DRAWINGS_ATM_CURRENT 0 CREDIT_AMT_DRAWINGS_CURRENT 0 CREDIT_AMT_DRAWINGS_OTHER_CURRENT 0 CREDIT_AMT_DRAWINGS_POS_CURRENT 0 CREDIT_AMT_INST_MIN_REGULARITY 0 CREDIT_AMT_PAYMENT_CURRENT 0 CREDIT_AMT_PAYMENT_TOTAL_CURRENT 0 CREDIT_AMT_RECEIVABLE_PRINCIPAL 0 CREDIT_AMT_RECIVABLE 0 CREDIT_AMT_TOTAL_RECEIVABLE 0 CREDIT_CNT_DRAWINGS_ATM_CURRENT 0 CREDIT_CNT_DRAWINGS_CURRENT 0 CREDIT_CNT_DRAWINGS_OTHER_CURRENT 0 CREDIT_CNT_DRAWINGS_POS_CURRENT 0 CREDIT_CNT_INSTALMENT_MATURE_CUM 0 CREDIT_SK_DPD 0 CREDIT_SK_DPD_DEF 0 CREDIT_NAME_CONTRACT_STATUS_Active 0 CREDIT_NAME_CONTRACT_STATUS_Approved 0 CREDIT_NAME_CONTRACT_STATUS_Completed 0 CREDIT_NAME_CONTRACT_STATUS_Demand 0 CREDIT_NAME_CONTRACT_STATUS_Refused 0 CREDIT_NAME_CONTRACT_STATUS_Sent proposal 0 CREDIT_NAME_CONTRACT_STATUS_Signed 0
AMT_ANNUITY 12 ANNUITY_INCOME_PERCENT 12 CREDIT_TERM 12 AMT_GOODS_PRICE 278 NAME_TYPE_SUITE 1292 OBS_30_CNT_SOCIAL_CIRCLE 1021 DEF_30_CNT_SOCIAL_CIRCLE 1021 OBS_60_CNT_SOCIAL_CIRCLE 1021 DEF_60_CNT_SOCIAL_CIRCLE 1021 DAYS_LAST_PHONE_CHANGE 1 EXT_SOURCE_2 660 CNT_FAM_MEMBERS 2
app_bure_prev.dropna(subset=['AMT_ANNUITY','ANNUITY_INCOME_PERCENT','CREDIT_TERM','AMT_GOODS_PRICE','NAME_TYPE_SUITE','OBS_30_CNT_SOCIAL_CIRCLE','DEF_30_CNT_SOCIAL_CIRCLE','OBS_60_CNT_SOCIAL_CIRCLE','DEF_60_CNT_SOCIAL_CIRCLE','DAYS_LAST_PHONE_CHANGE','EXT_SOURCE_2','CNT_FAM_MEMBERS'],inplace=True)
for col in app_bure_prev.columns:
print(col,app_bure_prev[col].isnull().sum())
SK_ID_CURR 0 TARGET 0 NAME_CONTRACT_TYPE 0 CODE_GENDER 0 FLAG_OWN_CAR 0 FLAG_OWN_REALTY 0 CNT_CHILDREN 0 AMT_INCOME_TOTAL 0 AMT_CREDIT 0 AMT_ANNUITY 0 AMT_GOODS_PRICE 0 NAME_TYPE_SUITE 0 NAME_INCOME_TYPE 0 NAME_EDUCATION_TYPE 0 NAME_FAMILY_STATUS 0 NAME_HOUSING_TYPE 0 REGION_POPULATION_RELATIVE 0 DAYS_BIRTH 0 DAYS_EMPLOYED 0 DAYS_REGISTRATION 0 DAYS_ID_PUBLISH 0 OWN_CAR_AGE 200912 FLAG_MOBIL 0 FLAG_EMP_PHONE 0 FLAG_WORK_PHONE 0 FLAG_CONT_MOBILE 0 FLAG_PHONE 0 FLAG_EMAIL 0 OCCUPATION_TYPE 95435 CNT_FAM_MEMBERS 0 REGION_RATING_CLIENT 0 REGION_RATING_CLIENT_W_CITY 0 WEEKDAY_APPR_PROCESS_START 0 HOUR_APPR_PROCESS_START 0 REG_REGION_NOT_LIVE_REGION 0 REG_REGION_NOT_WORK_REGION 0 LIVE_REGION_NOT_WORK_REGION 0 REG_CITY_NOT_LIVE_CITY 0 REG_CITY_NOT_WORK_CITY 0 LIVE_CITY_NOT_WORK_CITY 0 ORGANIZATION_TYPE 0 EXT_SOURCE_1 171652 EXT_SOURCE_2 0 EXT_SOURCE_3 60251 APARTMENTS_AVG 154677 BASEMENTAREA_AVG 178332 YEARS_BEGINEXPLUATATION_AVG 148686 YEARS_BUILD_AVG 202604 COMMONAREA_AVG 212870 ELEVATORS_AVG 162459 ENTRANCES_AVG 153468 FLOORSMAX_AVG 151677 FLOORSMIN_AVG 206726 LANDAREA_AVG 180937 LIVINGAPARTMENTS_AVG 208259 LIVINGAREA_AVG 152985 NONLIVINGAPARTMENTS_AVG 211544 NONLIVINGAREA_AVG 168187 APARTMENTS_MODE 154677 BASEMENTAREA_MODE 178332 YEARS_BEGINEXPLUATATION_MODE 148686 YEARS_BUILD_MODE 202604 COMMONAREA_MODE 212870 ELEVATORS_MODE 162459 ENTRANCES_MODE 153468 FLOORSMAX_MODE 151677 FLOORSMIN_MODE 206726 LANDAREA_MODE 180937 LIVINGAPARTMENTS_MODE 208259 LIVINGAREA_MODE 152985 NONLIVINGAPARTMENTS_MODE 211544 NONLIVINGAREA_MODE 168187 APARTMENTS_MEDI 154677 BASEMENTAREA_MEDI 178332 YEARS_BEGINEXPLUATATION_MEDI 148686 YEARS_BUILD_MEDI 202604 COMMONAREA_MEDI 212870 ELEVATORS_MEDI 162459 ENTRANCES_MEDI 153468 FLOORSMAX_MEDI 151677 FLOORSMIN_MEDI 206726 LANDAREA_MEDI 180937 LIVINGAPARTMENTS_MEDI 208259 LIVINGAREA_MEDI 152985 NONLIVINGAPARTMENTS_MEDI 211544 NONLIVINGAREA_MEDI 168187 FONDKAPREMONT_MODE 208352 HOUSETYPE_MODE 152937 TOTALAREA_MODE 147132 WALLSMATERIAL_MODE 154978 EMERGENCYSTATE_MODE 144475 OBS_30_CNT_SOCIAL_CIRCLE 0 DEF_30_CNT_SOCIAL_CIRCLE 0 OBS_60_CNT_SOCIAL_CIRCLE 0 DEF_60_CNT_SOCIAL_CIRCLE 0 DAYS_LAST_PHONE_CHANGE 0 FLAG_DOCUMENT_2 0 FLAG_DOCUMENT_3 0 FLAG_DOCUMENT_4 0 FLAG_DOCUMENT_5 0 FLAG_DOCUMENT_6 0 FLAG_DOCUMENT_7 0 FLAG_DOCUMENT_8 0 FLAG_DOCUMENT_9 0 FLAG_DOCUMENT_10 0 FLAG_DOCUMENT_11 0 FLAG_DOCUMENT_12 0 FLAG_DOCUMENT_13 0 FLAG_DOCUMENT_14 0 FLAG_DOCUMENT_15 0 FLAG_DOCUMENT_16 0 FLAG_DOCUMENT_17 0 FLAG_DOCUMENT_18 0 FLAG_DOCUMENT_19 0 FLAG_DOCUMENT_20 0 FLAG_DOCUMENT_21 0 AMT_REQ_CREDIT_BUREAU_HOUR 41108 AMT_REQ_CREDIT_BUREAU_DAY 41108 AMT_REQ_CREDIT_BUREAU_WEEK 41108 AMT_REQ_CREDIT_BUREAU_MON 41108 AMT_REQ_CREDIT_BUREAU_QRT 41108 AMT_REQ_CREDIT_BUREAU_YEAR 41108 INCOME_GT_CREDIT_FLAG 0 CREDIT_INCOME_PERCENT 0 ANNUITY_INCOME_PERCENT 0 CREDIT_TERM 0 DAYS_EMPLOYED_PERCENT 0 BUREAU_DAYS_CREDIT 0 BUREAU_CREDIT_DAY_OVERDUE 0 BUREAU_DAYS_CREDIT_ENDDATE 0 BUREAU_DAYS_ENDDATE_FACT 0 BUREAU_AMT_CREDIT_MAX_OVERDUE 0 BUREAU_CNT_CREDIT_PROLONG 0 BUREAU_AMT_CREDIT_SUM 0 BUREAU_AMT_CREDIT_SUM_DEBT 0 BUREAU_AMT_CREDIT_SUM_LIMIT 0 BUREAU_AMT_CREDIT_SUM_OVERDUE 0 BUREAU_DAYS_CREDIT_UPDATE 0 BUREAU_AMT_ANNUITY 0 BUREAU_CREDIT_ACTIVE_Active 0 BUREAU_CREDIT_ACTIVE_Bad debt 0 BUREAU_CREDIT_ACTIVE_Closed 0 BUREAU_CREDIT_ACTIVE_Sold 0 BUREAU_CREDIT_CURRENCY_currency 1 0 BUREAU_CREDIT_CURRENCY_currency 2 0 BUREAU_CREDIT_CURRENCY_currency 3 0 BUREAU_CREDIT_CURRENCY_currency 4 0 BUREAU_CREDIT_TYPE_Another type of loan 0 BUREAU_CREDIT_TYPE_Car loan 0 BUREAU_CREDIT_TYPE_Cash loan (non-earmarked) 0 BUREAU_CREDIT_TYPE_Consumer credit 0 BUREAU_CREDIT_TYPE_Credit card 0 BUREAU_CREDIT_TYPE_Interbank credit 0 BUREAU_CREDIT_TYPE_Loan for business development 0 BUREAU_CREDIT_TYPE_Loan for purchase of shares (margin lending) 0 BUREAU_CREDIT_TYPE_Loan for the purchase of equipment 0 BUREAU_CREDIT_TYPE_Loan for working capital replenishment 0 BUREAU_CREDIT_TYPE_Microloan 0 BUREAU_CREDIT_TYPE_Mobile operator loan 0 BUREAU_CREDIT_TYPE_Mortgage 0 BUREAU_CREDIT_TYPE_Real estate loan 0 BUREAU_CREDIT_TYPE_Unknown type of loan 0 PREV_APP_COUNT 0 PREV_AMT_ANNUITY 0 PREV_AMT_APPLICATION 0 PREV_AMT_CREDIT 0 PREV_AMT_DOWN_PAYMENT 0 PREV_AMT_GOODS_PRICE 0 PREV_HOUR_APPR_PROCESS_START 0 PREV_NFLAG_LAST_APPL_IN_DAY 0 PREV_RATE_DOWN_PAYMENT 0 PREV_RATE_INTEREST_PRIMARY 0 PREV_RATE_INTEREST_PRIVILEGED 0 PREV_DAYS_DECISION 0 PREV_SELLERPLACE_AREA 0 PREV_CNT_PAYMENT 0 PREV_DAYS_FIRST_DRAWING 0 PREV_DAYS_FIRST_DUE 0 PREV_DAYS_LAST_DUE_1ST_VERSION 0 PREV_DAYS_LAST_DUE 0 PREV_DAYS_TERMINATION 0 PREV_NFLAG_INSURED_ON_APPROVAL 0 PREV_NAME_CONTRACT_TYPE_Cash loans 0 PREV_NAME_CONTRACT_TYPE_Consumer loans 0 PREV_NAME_CONTRACT_TYPE_Revolving loans 0 PREV_NAME_CONTRACT_TYPE_XNA 0 PREV_WEEKDAY_APPR_PROCESS_START_FRIDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_MONDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SATURDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SUNDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_THURSDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_TUESDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_WEDNESDAY 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_N 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_Y 0 PREV_NAME_CASH_LOAN_PURPOSE_Building a house or an annex 0 PREV_NAME_CASH_LOAN_PURPOSE_Business development 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a garage 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a home 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a new car 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a used car 0 PREV_NAME_CASH_LOAN_PURPOSE_Car repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Education 0 PREV_NAME_CASH_LOAN_PURPOSE_Everyday expenses 0 PREV_NAME_CASH_LOAN_PURPOSE_Furniture 0 PREV_NAME_CASH_LOAN_PURPOSE_Gasification / water supply 0 PREV_NAME_CASH_LOAN_PURPOSE_Hobby 0 PREV_NAME_CASH_LOAN_PURPOSE_Journey 0 PREV_NAME_CASH_LOAN_PURPOSE_Medicine 0 PREV_NAME_CASH_LOAN_PURPOSE_Money for a third person 0 PREV_NAME_CASH_LOAN_PURPOSE_Other 0 PREV_NAME_CASH_LOAN_PURPOSE_Payments on other loans 0 PREV_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment 0 PREV_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal 0 PREV_NAME_CASH_LOAN_PURPOSE_Repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Urgent needs 0 PREV_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday 0 PREV_NAME_CASH_LOAN_PURPOSE_XAP 0 PREV_NAME_CASH_LOAN_PURPOSE_XNA 0 PREV_NAME_CONTRACT_STATUS_Approved 0 PREV_NAME_CONTRACT_STATUS_Canceled 0 PREV_NAME_CONTRACT_STATUS_Refused 0 PREV_NAME_CONTRACT_STATUS_Unused offer 0 PREV_NAME_PAYMENT_TYPE_Cash through the bank 0 PREV_NAME_PAYMENT_TYPE_Cashless from the account of the employer 0 PREV_NAME_PAYMENT_TYPE_Non-cash from your account 0 PREV_NAME_PAYMENT_TYPE_XNA 0 PREV_CODE_REJECT_REASON_CLIENT 0 PREV_CODE_REJECT_REASON_HC 0 PREV_CODE_REJECT_REASON_LIMIT 0 PREV_CODE_REJECT_REASON_SCO 0 PREV_CODE_REJECT_REASON_SCOFR 0 PREV_CODE_REJECT_REASON_SYSTEM 0 PREV_CODE_REJECT_REASON_VERIF 0 PREV_CODE_REJECT_REASON_XAP 0 PREV_CODE_REJECT_REASON_XNA 0 PREV_NAME_TYPE_SUITE_Children 0 PREV_NAME_TYPE_SUITE_Family 0 PREV_NAME_TYPE_SUITE_Group of people 0 PREV_NAME_TYPE_SUITE_Other_A 0 PREV_NAME_TYPE_SUITE_Other_B 0 PREV_NAME_TYPE_SUITE_Spouse, partner 0 PREV_NAME_TYPE_SUITE_Unaccompanied 0 PREV_NAME_CLIENT_TYPE_New 0 PREV_NAME_CLIENT_TYPE_Refreshed 0 PREV_NAME_CLIENT_TYPE_Repeater 0 PREV_NAME_CLIENT_TYPE_XNA 0 PREV_NAME_GOODS_CATEGORY_Additional Service 0 PREV_NAME_GOODS_CATEGORY_Animals 0 PREV_NAME_GOODS_CATEGORY_Audio/Video 0 PREV_NAME_GOODS_CATEGORY_Auto Accessories 0 PREV_NAME_GOODS_CATEGORY_Clothing and Accessories 0 PREV_NAME_GOODS_CATEGORY_Computers 0 PREV_NAME_GOODS_CATEGORY_Construction Materials 0 PREV_NAME_GOODS_CATEGORY_Consumer Electronics 0 PREV_NAME_GOODS_CATEGORY_Direct Sales 0 PREV_NAME_GOODS_CATEGORY_Education 0 PREV_NAME_GOODS_CATEGORY_Fitness 0 PREV_NAME_GOODS_CATEGORY_Furniture 0 PREV_NAME_GOODS_CATEGORY_Gardening 0 PREV_NAME_GOODS_CATEGORY_Homewares 0 PREV_NAME_GOODS_CATEGORY_House Construction 0 PREV_NAME_GOODS_CATEGORY_Insurance 0 PREV_NAME_GOODS_CATEGORY_Jewelry 0 PREV_NAME_GOODS_CATEGORY_Medical Supplies 0 PREV_NAME_GOODS_CATEGORY_Medicine 0 PREV_NAME_GOODS_CATEGORY_Mobile 0 PREV_NAME_GOODS_CATEGORY_Office Appliances 0 PREV_NAME_GOODS_CATEGORY_Other 0 PREV_NAME_GOODS_CATEGORY_Photo / Cinema Equipment 0 PREV_NAME_GOODS_CATEGORY_Sport and Leisure 0 PREV_NAME_GOODS_CATEGORY_Tourism 0 PREV_NAME_GOODS_CATEGORY_Vehicles 0 PREV_NAME_GOODS_CATEGORY_Weapon 0 PREV_NAME_GOODS_CATEGORY_XNA 0 PREV_NAME_PORTFOLIO_Cards 0 PREV_NAME_PORTFOLIO_Cars 0 PREV_NAME_PORTFOLIO_Cash 0 PREV_NAME_PORTFOLIO_POS 0 PREV_NAME_PORTFOLIO_XNA 0 PREV_NAME_PRODUCT_TYPE_XNA 0 PREV_NAME_PRODUCT_TYPE_walk-in 0 PREV_NAME_PRODUCT_TYPE_x-sell 0 PREV_CHANNEL_TYPE_AP+ (Cash loan) 0 PREV_CHANNEL_TYPE_Car dealer 0 PREV_CHANNEL_TYPE_Channel of corporate sales 0 PREV_CHANNEL_TYPE_Contact center 0 PREV_CHANNEL_TYPE_Country-wide 0 PREV_CHANNEL_TYPE_Credit and cash offices 0 PREV_CHANNEL_TYPE_Regional / Local 0 PREV_CHANNEL_TYPE_Stone 0 PREV_NAME_SELLER_INDUSTRY_Auto technology 0 PREV_NAME_SELLER_INDUSTRY_Clothing 0 PREV_NAME_SELLER_INDUSTRY_Connectivity 0 PREV_NAME_SELLER_INDUSTRY_Construction 0 PREV_NAME_SELLER_INDUSTRY_Consumer electronics 0 PREV_NAME_SELLER_INDUSTRY_Furniture 0 PREV_NAME_SELLER_INDUSTRY_Industry 0 PREV_NAME_SELLER_INDUSTRY_Jewelry 0 PREV_NAME_SELLER_INDUSTRY_MLM partners 0 PREV_NAME_SELLER_INDUSTRY_Tourism 0 PREV_NAME_SELLER_INDUSTRY_XNA 0 PREV_NAME_YIELD_GROUP_XNA 0 PREV_NAME_YIELD_GROUP_high 0 PREV_NAME_YIELD_GROUP_low_action 0 PREV_NAME_YIELD_GROUP_low_normal 0 PREV_NAME_YIELD_GROUP_middle 0 PREV_PRODUCT_COMBINATION_Card Street 0 PREV_PRODUCT_COMBINATION_Card X-Sell 0 PREV_PRODUCT_COMBINATION_Cash 0 PREV_PRODUCT_COMBINATION_Cash Street: high 0 PREV_PRODUCT_COMBINATION_Cash Street: low 0 PREV_PRODUCT_COMBINATION_Cash Street: middle 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: high 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: low 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: middle 0 PREV_PRODUCT_COMBINATION_POS household with interest 0 PREV_PRODUCT_COMBINATION_POS household without interest 0 PREV_PRODUCT_COMBINATION_POS industry with interest 0 PREV_PRODUCT_COMBINATION_POS industry without interest 0 PREV_PRODUCT_COMBINATION_POS mobile with interest 0 PREV_PRODUCT_COMBINATION_POS mobile without interest 0 PREV_PRODUCT_COMBINATION_POS other with interest 0 PREV_PRODUCT_COMBINATION_POS others without interest 0 POS_MONTHS_BALANCE 0 POS_CNT_INSTALMENT 0 POS_CNT_INSTALMENT_FUTURE 0 POS_SK_DPD 0 POS_SK_DPD_DEF 0 POS_NAME_CONTRACT_STATUS_Active 0 POS_NAME_CONTRACT_STATUS_Amortized debt 0 POS_NAME_CONTRACT_STATUS_Approved 0 POS_NAME_CONTRACT_STATUS_Canceled 0 POS_NAME_CONTRACT_STATUS_Completed 0 POS_NAME_CONTRACT_STATUS_Demand 0 POS_NAME_CONTRACT_STATUS_Returned to the store 0 POS_NAME_CONTRACT_STATUS_Signed 0 POS_NAME_CONTRACT_STATUS_XNA 0 INSTA_NUM_INSTALMENT_VERSION 0 INSTA_NUM_INSTALMENT_NUMBER 0 INSTA_DAYS_INSTALMENT 0 INSTA_DAYS_ENTRY_PAYMENT 0 INSTA_AMT_INSTALMENT 0 INSTA_AMT_PAYMENT 0 CREDIT_MONTHS_BALANCE 0 CREDIT_AMT_BALANCE 0 CREDIT_AMT_CREDIT_LIMIT_ACTUAL 0 CREDIT_AMT_DRAWINGS_ATM_CURRENT 0 CREDIT_AMT_DRAWINGS_CURRENT 0 CREDIT_AMT_DRAWINGS_OTHER_CURRENT 0 CREDIT_AMT_DRAWINGS_POS_CURRENT 0 CREDIT_AMT_INST_MIN_REGULARITY 0 CREDIT_AMT_PAYMENT_CURRENT 0 CREDIT_AMT_PAYMENT_TOTAL_CURRENT 0 CREDIT_AMT_RECEIVABLE_PRINCIPAL 0 CREDIT_AMT_RECIVABLE 0 CREDIT_AMT_TOTAL_RECEIVABLE 0 CREDIT_CNT_DRAWINGS_ATM_CURRENT 0 CREDIT_CNT_DRAWINGS_CURRENT 0 CREDIT_CNT_DRAWINGS_OTHER_CURRENT 0 CREDIT_CNT_DRAWINGS_POS_CURRENT 0 CREDIT_CNT_INSTALMENT_MATURE_CUM 0 CREDIT_SK_DPD 0 CREDIT_SK_DPD_DEF 0 CREDIT_NAME_CONTRACT_STATUS_Active 0 CREDIT_NAME_CONTRACT_STATUS_Approved 0 CREDIT_NAME_CONTRACT_STATUS_Completed 0 CREDIT_NAME_CONTRACT_STATUS_Demand 0 CREDIT_NAME_CONTRACT_STATUS_Refused 0 CREDIT_NAME_CONTRACT_STATUS_Sent proposal 0 CREDIT_NAME_CONTRACT_STATUS_Signed 0
'EXT_SOURCE_1', 'APARTMENTS_AVG', 'BASEMENTAREA_AVG', 'YEARS_BUILD_AVG', 'COMMONAREA_AVG', 'ELEVATORS_AVG', 'ENTRANCES_AVG', 'FLOORSMAX_AVG', 'FLOORSMIN_AVG', 'LANDAREA_AVG', 'LIVINGAPARTMENTS_AVG', 'LIVINGAREA_AVG', 'NONLIVINGAPARTMENTS_AVG', 'NONLIVINGAREA_AVG', 'APARTMENTS_MODE', 'BASEMENTAREA_MODE', 'YEARS_BUILD_MODE', 'COMMONAREA_MODE', 'ELEVATORS_MODE', 'ENTRANCES_MODE', 'FLOORSMAX_MODE', 'FLOORSMIN_MODE', 'LANDAREA_MODE', 'LIVINGAPARTMENTS_MODE', 'LIVINGAREA_MODE', 'NONLIVINGAPARTMENTS_MODE', 'NONLIVINGAREA_MODE', 'APARTMENTS_MEDI', 'BASEMENTAREA_MEDI', 'YEARS_BUILD_MEDI', 'COMMONAREA_MEDI', 'ELEVATORS_MEDI', 'ENTRANCES_MEDI', 'FLOORSMAX_MEDI', 'FLOORSMIN_MEDI', 'LANDAREA_MEDI', 'LIVINGAPARTMENTS_MEDI', 'LIVINGAREA_MEDI', 'NONLIVINGAPARTMENTS_MEDI', 'NONLIVINGAREA_MEDI', 'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'WALLSMATERIAL_MODE'
start_h=app_train.columns.get_loc("OWN_CAR_AGE")
end_h=app_bure_prev.columns.get_loc("WALLSMATERIAL_MODE")
df_house=app_bure_prev.iloc[:,start_h:end_h+1]
df_house["TARGET"]=app_bure_prev["TARGET"]
df_house["NAME_HOUSING_TYPE"]=app_bure_prev["NAME_HOUSING_TYPE"]
start_hou=df_house.columns.get_loc("OWN_CAR_AGE")
end_hou=df_house.columns.get_loc("WALLSMATERIAL_MODE")
house_df=df_house.iloc[:,start_hou:end_hou+1]
# plt.figure(figsize=(25,20))
# sns.heatmap(house_df.corr(),annot=True)
app_bure_prev.drop(['EXT_SOURCE_1',
'APARTMENTS_AVG',
'BASEMENTAREA_AVG',
'YEARS_BUILD_AVG',
'COMMONAREA_AVG',
'ELEVATORS_AVG',
'ENTRANCES_AVG',
'FLOORSMAX_AVG',
'FLOORSMIN_AVG',
'LANDAREA_AVG',
'LIVINGAPARTMENTS_AVG',
'LIVINGAREA_AVG',
'NONLIVINGAPARTMENTS_AVG',
'NONLIVINGAREA_AVG',
'APARTMENTS_MODE',
'BASEMENTAREA_MODE',
'YEARS_BUILD_MODE',
'COMMONAREA_MODE',
'ELEVATORS_MODE',
'ENTRANCES_MODE',
'FLOORSMAX_MODE',
'FLOORSMIN_MODE',
'LANDAREA_MODE',
'LIVINGAPARTMENTS_MODE',
'LIVINGAREA_MODE',
'NONLIVINGAPARTMENTS_MODE',
'NONLIVINGAREA_MODE',
'APARTMENTS_MEDI',
'BASEMENTAREA_MEDI',
'YEARS_BUILD_MEDI',
'COMMONAREA_MEDI',
'ELEVATORS_MEDI',
'ENTRANCES_MEDI',
'FLOORSMAX_MEDI',
'FLOORSMIN_MEDI',
'LANDAREA_MEDI',
'LIVINGAPARTMENTS_MEDI',
'LIVINGAREA_MEDI',
'NONLIVINGAPARTMENTS_MEDI',
'NONLIVINGAREA_MEDI',
'FONDKAPREMONT_MODE',
'HOUSETYPE_MODE',
'WALLSMATERIAL_MODE'],axis=1,inplace=True)
for col in app_bure_prev.columns:
print(col,app_bure_prev[col].isnull().sum())
SK_ID_CURR 0 TARGET 0 NAME_CONTRACT_TYPE 0 CODE_GENDER 0 FLAG_OWN_CAR 0 FLAG_OWN_REALTY 0 CNT_CHILDREN 0 AMT_INCOME_TOTAL 0 AMT_CREDIT 0 AMT_ANNUITY 0 AMT_GOODS_PRICE 0 NAME_TYPE_SUITE 0 NAME_INCOME_TYPE 0 NAME_EDUCATION_TYPE 0 NAME_FAMILY_STATUS 0 NAME_HOUSING_TYPE 0 REGION_POPULATION_RELATIVE 0 DAYS_BIRTH 0 DAYS_EMPLOYED 0 DAYS_REGISTRATION 0 DAYS_ID_PUBLISH 0 OWN_CAR_AGE 200912 FLAG_MOBIL 0 FLAG_EMP_PHONE 0 FLAG_WORK_PHONE 0 FLAG_CONT_MOBILE 0 FLAG_PHONE 0 FLAG_EMAIL 0 OCCUPATION_TYPE 95435 CNT_FAM_MEMBERS 0 REGION_RATING_CLIENT 0 REGION_RATING_CLIENT_W_CITY 0 WEEKDAY_APPR_PROCESS_START 0 HOUR_APPR_PROCESS_START 0 REG_REGION_NOT_LIVE_REGION 0 REG_REGION_NOT_WORK_REGION 0 LIVE_REGION_NOT_WORK_REGION 0 REG_CITY_NOT_LIVE_CITY 0 REG_CITY_NOT_WORK_CITY 0 LIVE_CITY_NOT_WORK_CITY 0 ORGANIZATION_TYPE 0 EXT_SOURCE_2 0 EXT_SOURCE_3 60251 YEARS_BEGINEXPLUATATION_AVG 148686 YEARS_BEGINEXPLUATATION_MODE 148686 YEARS_BEGINEXPLUATATION_MEDI 148686 TOTALAREA_MODE 147132 EMERGENCYSTATE_MODE 144475 OBS_30_CNT_SOCIAL_CIRCLE 0 DEF_30_CNT_SOCIAL_CIRCLE 0 OBS_60_CNT_SOCIAL_CIRCLE 0 DEF_60_CNT_SOCIAL_CIRCLE 0 DAYS_LAST_PHONE_CHANGE 0 FLAG_DOCUMENT_2 0 FLAG_DOCUMENT_3 0 FLAG_DOCUMENT_4 0 FLAG_DOCUMENT_5 0 FLAG_DOCUMENT_6 0 FLAG_DOCUMENT_7 0 FLAG_DOCUMENT_8 0 FLAG_DOCUMENT_9 0 FLAG_DOCUMENT_10 0 FLAG_DOCUMENT_11 0 FLAG_DOCUMENT_12 0 FLAG_DOCUMENT_13 0 FLAG_DOCUMENT_14 0 FLAG_DOCUMENT_15 0 FLAG_DOCUMENT_16 0 FLAG_DOCUMENT_17 0 FLAG_DOCUMENT_18 0 FLAG_DOCUMENT_19 0 FLAG_DOCUMENT_20 0 FLAG_DOCUMENT_21 0 AMT_REQ_CREDIT_BUREAU_HOUR 41108 AMT_REQ_CREDIT_BUREAU_DAY 41108 AMT_REQ_CREDIT_BUREAU_WEEK 41108 AMT_REQ_CREDIT_BUREAU_MON 41108 AMT_REQ_CREDIT_BUREAU_QRT 41108 AMT_REQ_CREDIT_BUREAU_YEAR 41108 INCOME_GT_CREDIT_FLAG 0 CREDIT_INCOME_PERCENT 0 ANNUITY_INCOME_PERCENT 0 CREDIT_TERM 0 DAYS_EMPLOYED_PERCENT 0 BUREAU_DAYS_CREDIT 0 BUREAU_CREDIT_DAY_OVERDUE 0 BUREAU_DAYS_CREDIT_ENDDATE 0 BUREAU_DAYS_ENDDATE_FACT 0 BUREAU_AMT_CREDIT_MAX_OVERDUE 0 BUREAU_CNT_CREDIT_PROLONG 0 BUREAU_AMT_CREDIT_SUM 0 BUREAU_AMT_CREDIT_SUM_DEBT 0 BUREAU_AMT_CREDIT_SUM_LIMIT 0 BUREAU_AMT_CREDIT_SUM_OVERDUE 0 BUREAU_DAYS_CREDIT_UPDATE 0 BUREAU_AMT_ANNUITY 0 BUREAU_CREDIT_ACTIVE_Active 0 BUREAU_CREDIT_ACTIVE_Bad debt 0 BUREAU_CREDIT_ACTIVE_Closed 0 BUREAU_CREDIT_ACTIVE_Sold 0 BUREAU_CREDIT_CURRENCY_currency 1 0 BUREAU_CREDIT_CURRENCY_currency 2 0 BUREAU_CREDIT_CURRENCY_currency 3 0 BUREAU_CREDIT_CURRENCY_currency 4 0 BUREAU_CREDIT_TYPE_Another type of loan 0 BUREAU_CREDIT_TYPE_Car loan 0 BUREAU_CREDIT_TYPE_Cash loan (non-earmarked) 0 BUREAU_CREDIT_TYPE_Consumer credit 0 BUREAU_CREDIT_TYPE_Credit card 0 BUREAU_CREDIT_TYPE_Interbank credit 0 BUREAU_CREDIT_TYPE_Loan for business development 0 BUREAU_CREDIT_TYPE_Loan for purchase of shares (margin lending) 0 BUREAU_CREDIT_TYPE_Loan for the purchase of equipment 0 BUREAU_CREDIT_TYPE_Loan for working capital replenishment 0 BUREAU_CREDIT_TYPE_Microloan 0 BUREAU_CREDIT_TYPE_Mobile operator loan 0 BUREAU_CREDIT_TYPE_Mortgage 0 BUREAU_CREDIT_TYPE_Real estate loan 0 BUREAU_CREDIT_TYPE_Unknown type of loan 0 PREV_APP_COUNT 0 PREV_AMT_ANNUITY 0 PREV_AMT_APPLICATION 0 PREV_AMT_CREDIT 0 PREV_AMT_DOWN_PAYMENT 0 PREV_AMT_GOODS_PRICE 0 PREV_HOUR_APPR_PROCESS_START 0 PREV_NFLAG_LAST_APPL_IN_DAY 0 PREV_RATE_DOWN_PAYMENT 0 PREV_RATE_INTEREST_PRIMARY 0 PREV_RATE_INTEREST_PRIVILEGED 0 PREV_DAYS_DECISION 0 PREV_SELLERPLACE_AREA 0 PREV_CNT_PAYMENT 0 PREV_DAYS_FIRST_DRAWING 0 PREV_DAYS_FIRST_DUE 0 PREV_DAYS_LAST_DUE_1ST_VERSION 0 PREV_DAYS_LAST_DUE 0 PREV_DAYS_TERMINATION 0 PREV_NFLAG_INSURED_ON_APPROVAL 0 PREV_NAME_CONTRACT_TYPE_Cash loans 0 PREV_NAME_CONTRACT_TYPE_Consumer loans 0 PREV_NAME_CONTRACT_TYPE_Revolving loans 0 PREV_NAME_CONTRACT_TYPE_XNA 0 PREV_WEEKDAY_APPR_PROCESS_START_FRIDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_MONDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SATURDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SUNDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_THURSDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_TUESDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_WEDNESDAY 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_N 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_Y 0 PREV_NAME_CASH_LOAN_PURPOSE_Building a house or an annex 0 PREV_NAME_CASH_LOAN_PURPOSE_Business development 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a garage 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a home 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a new car 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a used car 0 PREV_NAME_CASH_LOAN_PURPOSE_Car repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Education 0 PREV_NAME_CASH_LOAN_PURPOSE_Everyday expenses 0 PREV_NAME_CASH_LOAN_PURPOSE_Furniture 0 PREV_NAME_CASH_LOAN_PURPOSE_Gasification / water supply 0 PREV_NAME_CASH_LOAN_PURPOSE_Hobby 0 PREV_NAME_CASH_LOAN_PURPOSE_Journey 0 PREV_NAME_CASH_LOAN_PURPOSE_Medicine 0 PREV_NAME_CASH_LOAN_PURPOSE_Money for a third person 0 PREV_NAME_CASH_LOAN_PURPOSE_Other 0 PREV_NAME_CASH_LOAN_PURPOSE_Payments on other loans 0 PREV_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment 0 PREV_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal 0 PREV_NAME_CASH_LOAN_PURPOSE_Repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Urgent needs 0 PREV_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday 0 PREV_NAME_CASH_LOAN_PURPOSE_XAP 0 PREV_NAME_CASH_LOAN_PURPOSE_XNA 0 PREV_NAME_CONTRACT_STATUS_Approved 0 PREV_NAME_CONTRACT_STATUS_Canceled 0 PREV_NAME_CONTRACT_STATUS_Refused 0 PREV_NAME_CONTRACT_STATUS_Unused offer 0 PREV_NAME_PAYMENT_TYPE_Cash through the bank 0 PREV_NAME_PAYMENT_TYPE_Cashless from the account of the employer 0 PREV_NAME_PAYMENT_TYPE_Non-cash from your account 0 PREV_NAME_PAYMENT_TYPE_XNA 0 PREV_CODE_REJECT_REASON_CLIENT 0 PREV_CODE_REJECT_REASON_HC 0 PREV_CODE_REJECT_REASON_LIMIT 0 PREV_CODE_REJECT_REASON_SCO 0 PREV_CODE_REJECT_REASON_SCOFR 0 PREV_CODE_REJECT_REASON_SYSTEM 0 PREV_CODE_REJECT_REASON_VERIF 0 PREV_CODE_REJECT_REASON_XAP 0 PREV_CODE_REJECT_REASON_XNA 0 PREV_NAME_TYPE_SUITE_Children 0 PREV_NAME_TYPE_SUITE_Family 0 PREV_NAME_TYPE_SUITE_Group of people 0 PREV_NAME_TYPE_SUITE_Other_A 0 PREV_NAME_TYPE_SUITE_Other_B 0 PREV_NAME_TYPE_SUITE_Spouse, partner 0 PREV_NAME_TYPE_SUITE_Unaccompanied 0 PREV_NAME_CLIENT_TYPE_New 0 PREV_NAME_CLIENT_TYPE_Refreshed 0 PREV_NAME_CLIENT_TYPE_Repeater 0 PREV_NAME_CLIENT_TYPE_XNA 0 PREV_NAME_GOODS_CATEGORY_Additional Service 0 PREV_NAME_GOODS_CATEGORY_Animals 0 PREV_NAME_GOODS_CATEGORY_Audio/Video 0 PREV_NAME_GOODS_CATEGORY_Auto Accessories 0 PREV_NAME_GOODS_CATEGORY_Clothing and Accessories 0 PREV_NAME_GOODS_CATEGORY_Computers 0 PREV_NAME_GOODS_CATEGORY_Construction Materials 0 PREV_NAME_GOODS_CATEGORY_Consumer Electronics 0 PREV_NAME_GOODS_CATEGORY_Direct Sales 0 PREV_NAME_GOODS_CATEGORY_Education 0 PREV_NAME_GOODS_CATEGORY_Fitness 0 PREV_NAME_GOODS_CATEGORY_Furniture 0 PREV_NAME_GOODS_CATEGORY_Gardening 0 PREV_NAME_GOODS_CATEGORY_Homewares 0 PREV_NAME_GOODS_CATEGORY_House Construction 0 PREV_NAME_GOODS_CATEGORY_Insurance 0 PREV_NAME_GOODS_CATEGORY_Jewelry 0 PREV_NAME_GOODS_CATEGORY_Medical Supplies 0 PREV_NAME_GOODS_CATEGORY_Medicine 0 PREV_NAME_GOODS_CATEGORY_Mobile 0 PREV_NAME_GOODS_CATEGORY_Office Appliances 0 PREV_NAME_GOODS_CATEGORY_Other 0 PREV_NAME_GOODS_CATEGORY_Photo / Cinema Equipment 0 PREV_NAME_GOODS_CATEGORY_Sport and Leisure 0 PREV_NAME_GOODS_CATEGORY_Tourism 0 PREV_NAME_GOODS_CATEGORY_Vehicles 0 PREV_NAME_GOODS_CATEGORY_Weapon 0 PREV_NAME_GOODS_CATEGORY_XNA 0 PREV_NAME_PORTFOLIO_Cards 0 PREV_NAME_PORTFOLIO_Cars 0 PREV_NAME_PORTFOLIO_Cash 0 PREV_NAME_PORTFOLIO_POS 0 PREV_NAME_PORTFOLIO_XNA 0 PREV_NAME_PRODUCT_TYPE_XNA 0 PREV_NAME_PRODUCT_TYPE_walk-in 0 PREV_NAME_PRODUCT_TYPE_x-sell 0 PREV_CHANNEL_TYPE_AP+ (Cash loan) 0 PREV_CHANNEL_TYPE_Car dealer 0 PREV_CHANNEL_TYPE_Channel of corporate sales 0 PREV_CHANNEL_TYPE_Contact center 0 PREV_CHANNEL_TYPE_Country-wide 0 PREV_CHANNEL_TYPE_Credit and cash offices 0 PREV_CHANNEL_TYPE_Regional / Local 0 PREV_CHANNEL_TYPE_Stone 0 PREV_NAME_SELLER_INDUSTRY_Auto technology 0 PREV_NAME_SELLER_INDUSTRY_Clothing 0 PREV_NAME_SELLER_INDUSTRY_Connectivity 0 PREV_NAME_SELLER_INDUSTRY_Construction 0 PREV_NAME_SELLER_INDUSTRY_Consumer electronics 0 PREV_NAME_SELLER_INDUSTRY_Furniture 0 PREV_NAME_SELLER_INDUSTRY_Industry 0 PREV_NAME_SELLER_INDUSTRY_Jewelry 0 PREV_NAME_SELLER_INDUSTRY_MLM partners 0 PREV_NAME_SELLER_INDUSTRY_Tourism 0 PREV_NAME_SELLER_INDUSTRY_XNA 0 PREV_NAME_YIELD_GROUP_XNA 0 PREV_NAME_YIELD_GROUP_high 0 PREV_NAME_YIELD_GROUP_low_action 0 PREV_NAME_YIELD_GROUP_low_normal 0 PREV_NAME_YIELD_GROUP_middle 0 PREV_PRODUCT_COMBINATION_Card Street 0 PREV_PRODUCT_COMBINATION_Card X-Sell 0 PREV_PRODUCT_COMBINATION_Cash 0 PREV_PRODUCT_COMBINATION_Cash Street: high 0 PREV_PRODUCT_COMBINATION_Cash Street: low 0 PREV_PRODUCT_COMBINATION_Cash Street: middle 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: high 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: low 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: middle 0 PREV_PRODUCT_COMBINATION_POS household with interest 0 PREV_PRODUCT_COMBINATION_POS household without interest 0 PREV_PRODUCT_COMBINATION_POS industry with interest 0 PREV_PRODUCT_COMBINATION_POS industry without interest 0 PREV_PRODUCT_COMBINATION_POS mobile with interest 0 PREV_PRODUCT_COMBINATION_POS mobile without interest 0 PREV_PRODUCT_COMBINATION_POS other with interest 0 PREV_PRODUCT_COMBINATION_POS others without interest 0 POS_MONTHS_BALANCE 0 POS_CNT_INSTALMENT 0 POS_CNT_INSTALMENT_FUTURE 0 POS_SK_DPD 0 POS_SK_DPD_DEF 0 POS_NAME_CONTRACT_STATUS_Active 0 POS_NAME_CONTRACT_STATUS_Amortized debt 0 POS_NAME_CONTRACT_STATUS_Approved 0 POS_NAME_CONTRACT_STATUS_Canceled 0 POS_NAME_CONTRACT_STATUS_Completed 0 POS_NAME_CONTRACT_STATUS_Demand 0 POS_NAME_CONTRACT_STATUS_Returned to the store 0 POS_NAME_CONTRACT_STATUS_Signed 0 POS_NAME_CONTRACT_STATUS_XNA 0 INSTA_NUM_INSTALMENT_VERSION 0 INSTA_NUM_INSTALMENT_NUMBER 0 INSTA_DAYS_INSTALMENT 0 INSTA_DAYS_ENTRY_PAYMENT 0 INSTA_AMT_INSTALMENT 0 INSTA_AMT_PAYMENT 0 CREDIT_MONTHS_BALANCE 0 CREDIT_AMT_BALANCE 0 CREDIT_AMT_CREDIT_LIMIT_ACTUAL 0 CREDIT_AMT_DRAWINGS_ATM_CURRENT 0 CREDIT_AMT_DRAWINGS_CURRENT 0 CREDIT_AMT_DRAWINGS_OTHER_CURRENT 0 CREDIT_AMT_DRAWINGS_POS_CURRENT 0 CREDIT_AMT_INST_MIN_REGULARITY 0 CREDIT_AMT_PAYMENT_CURRENT 0 CREDIT_AMT_PAYMENT_TOTAL_CURRENT 0 CREDIT_AMT_RECEIVABLE_PRINCIPAL 0 CREDIT_AMT_RECIVABLE 0 CREDIT_AMT_TOTAL_RECEIVABLE 0 CREDIT_CNT_DRAWINGS_ATM_CURRENT 0 CREDIT_CNT_DRAWINGS_CURRENT 0 CREDIT_CNT_DRAWINGS_OTHER_CURRENT 0 CREDIT_CNT_DRAWINGS_POS_CURRENT 0 CREDIT_CNT_INSTALMENT_MATURE_CUM 0 CREDIT_SK_DPD 0 CREDIT_SK_DPD_DEF 0 CREDIT_NAME_CONTRACT_STATUS_Active 0 CREDIT_NAME_CONTRACT_STATUS_Approved 0 CREDIT_NAME_CONTRACT_STATUS_Completed 0 CREDIT_NAME_CONTRACT_STATUS_Demand 0 CREDIT_NAME_CONTRACT_STATUS_Refused 0 CREDIT_NAME_CONTRACT_STATUS_Sent proposal 0 CREDIT_NAME_CONTRACT_STATUS_Signed 0
app_bure_prev["AMT_REQ_CREDIT_BUREAU_HOUR"].fillna(app_train["AMT_REQ_CREDIT_BUREAU_HOUR"].median(),inplace=True)
app_bure_prev["AMT_REQ_CREDIT_BUREAU_DAY"].fillna(app_train["AMT_REQ_CREDIT_BUREAU_DAY"].median(),inplace=True)
app_bure_prev["AMT_REQ_CREDIT_BUREAU_WEEK"].fillna(app_train["AMT_REQ_CREDIT_BUREAU_WEEK"].median(),inplace=True)
app_bure_prev["AMT_REQ_CREDIT_BUREAU_MON"].fillna(app_train["AMT_REQ_CREDIT_BUREAU_MON"].median(),inplace=True)
app_bure_prev["AMT_REQ_CREDIT_BUREAU_QRT"].fillna(app_train["AMT_REQ_CREDIT_BUREAU_QRT"].median(),inplace=True)
app_bure_prev["AMT_REQ_CREDIT_BUREAU_YEAR"].fillna(app_train["AMT_REQ_CREDIT_BUREAU_YEAR"].median(),inplace=True)
app_bure_prev["EXT_SOURCE_3"].fillna(app_train["EXT_SOURCE_3"].median(),inplace=True)
app_bure_prev["OWN_CAR_AGE"].fillna(app_train["OWN_CAR_AGE"].median(),inplace=True)
for col in app_bure_prev.columns:
print(col,app_bure_prev[col].isnull().sum())
SK_ID_CURR 0 TARGET 0 NAME_CONTRACT_TYPE 0 CODE_GENDER 0 FLAG_OWN_CAR 0 FLAG_OWN_REALTY 0 CNT_CHILDREN 0 AMT_INCOME_TOTAL 0 AMT_CREDIT 0 AMT_ANNUITY 0 AMT_GOODS_PRICE 0 NAME_TYPE_SUITE 0 NAME_INCOME_TYPE 0 NAME_EDUCATION_TYPE 0 NAME_FAMILY_STATUS 0 NAME_HOUSING_TYPE 0 REGION_POPULATION_RELATIVE 0 DAYS_BIRTH 0 DAYS_EMPLOYED 0 DAYS_REGISTRATION 0 DAYS_ID_PUBLISH 0 OWN_CAR_AGE 0 FLAG_MOBIL 0 FLAG_EMP_PHONE 0 FLAG_WORK_PHONE 0 FLAG_CONT_MOBILE 0 FLAG_PHONE 0 FLAG_EMAIL 0 OCCUPATION_TYPE 95435 CNT_FAM_MEMBERS 0 REGION_RATING_CLIENT 0 REGION_RATING_CLIENT_W_CITY 0 WEEKDAY_APPR_PROCESS_START 0 HOUR_APPR_PROCESS_START 0 REG_REGION_NOT_LIVE_REGION 0 REG_REGION_NOT_WORK_REGION 0 LIVE_REGION_NOT_WORK_REGION 0 REG_CITY_NOT_LIVE_CITY 0 REG_CITY_NOT_WORK_CITY 0 LIVE_CITY_NOT_WORK_CITY 0 ORGANIZATION_TYPE 0 EXT_SOURCE_2 0 EXT_SOURCE_3 0 YEARS_BEGINEXPLUATATION_AVG 148686 YEARS_BEGINEXPLUATATION_MODE 148686 YEARS_BEGINEXPLUATATION_MEDI 148686 TOTALAREA_MODE 147132 EMERGENCYSTATE_MODE 144475 OBS_30_CNT_SOCIAL_CIRCLE 0 DEF_30_CNT_SOCIAL_CIRCLE 0 OBS_60_CNT_SOCIAL_CIRCLE 0 DEF_60_CNT_SOCIAL_CIRCLE 0 DAYS_LAST_PHONE_CHANGE 0 FLAG_DOCUMENT_2 0 FLAG_DOCUMENT_3 0 FLAG_DOCUMENT_4 0 FLAG_DOCUMENT_5 0 FLAG_DOCUMENT_6 0 FLAG_DOCUMENT_7 0 FLAG_DOCUMENT_8 0 FLAG_DOCUMENT_9 0 FLAG_DOCUMENT_10 0 FLAG_DOCUMENT_11 0 FLAG_DOCUMENT_12 0 FLAG_DOCUMENT_13 0 FLAG_DOCUMENT_14 0 FLAG_DOCUMENT_15 0 FLAG_DOCUMENT_16 0 FLAG_DOCUMENT_17 0 FLAG_DOCUMENT_18 0 FLAG_DOCUMENT_19 0 FLAG_DOCUMENT_20 0 FLAG_DOCUMENT_21 0 AMT_REQ_CREDIT_BUREAU_HOUR 0 AMT_REQ_CREDIT_BUREAU_DAY 0 AMT_REQ_CREDIT_BUREAU_WEEK 0 AMT_REQ_CREDIT_BUREAU_MON 0 AMT_REQ_CREDIT_BUREAU_QRT 0 AMT_REQ_CREDIT_BUREAU_YEAR 0 INCOME_GT_CREDIT_FLAG 0 CREDIT_INCOME_PERCENT 0 ANNUITY_INCOME_PERCENT 0 CREDIT_TERM 0 DAYS_EMPLOYED_PERCENT 0 BUREAU_DAYS_CREDIT 0 BUREAU_CREDIT_DAY_OVERDUE 0 BUREAU_DAYS_CREDIT_ENDDATE 0 BUREAU_DAYS_ENDDATE_FACT 0 BUREAU_AMT_CREDIT_MAX_OVERDUE 0 BUREAU_CNT_CREDIT_PROLONG 0 BUREAU_AMT_CREDIT_SUM 0 BUREAU_AMT_CREDIT_SUM_DEBT 0 BUREAU_AMT_CREDIT_SUM_LIMIT 0 BUREAU_AMT_CREDIT_SUM_OVERDUE 0 BUREAU_DAYS_CREDIT_UPDATE 0 BUREAU_AMT_ANNUITY 0 BUREAU_CREDIT_ACTIVE_Active 0 BUREAU_CREDIT_ACTIVE_Bad debt 0 BUREAU_CREDIT_ACTIVE_Closed 0 BUREAU_CREDIT_ACTIVE_Sold 0 BUREAU_CREDIT_CURRENCY_currency 1 0 BUREAU_CREDIT_CURRENCY_currency 2 0 BUREAU_CREDIT_CURRENCY_currency 3 0 BUREAU_CREDIT_CURRENCY_currency 4 0 BUREAU_CREDIT_TYPE_Another type of loan 0 BUREAU_CREDIT_TYPE_Car loan 0 BUREAU_CREDIT_TYPE_Cash loan (non-earmarked) 0 BUREAU_CREDIT_TYPE_Consumer credit 0 BUREAU_CREDIT_TYPE_Credit card 0 BUREAU_CREDIT_TYPE_Interbank credit 0 BUREAU_CREDIT_TYPE_Loan for business development 0 BUREAU_CREDIT_TYPE_Loan for purchase of shares (margin lending) 0 BUREAU_CREDIT_TYPE_Loan for the purchase of equipment 0 BUREAU_CREDIT_TYPE_Loan for working capital replenishment 0 BUREAU_CREDIT_TYPE_Microloan 0 BUREAU_CREDIT_TYPE_Mobile operator loan 0 BUREAU_CREDIT_TYPE_Mortgage 0 BUREAU_CREDIT_TYPE_Real estate loan 0 BUREAU_CREDIT_TYPE_Unknown type of loan 0 PREV_APP_COUNT 0 PREV_AMT_ANNUITY 0 PREV_AMT_APPLICATION 0 PREV_AMT_CREDIT 0 PREV_AMT_DOWN_PAYMENT 0 PREV_AMT_GOODS_PRICE 0 PREV_HOUR_APPR_PROCESS_START 0 PREV_NFLAG_LAST_APPL_IN_DAY 0 PREV_RATE_DOWN_PAYMENT 0 PREV_RATE_INTEREST_PRIMARY 0 PREV_RATE_INTEREST_PRIVILEGED 0 PREV_DAYS_DECISION 0 PREV_SELLERPLACE_AREA 0 PREV_CNT_PAYMENT 0 PREV_DAYS_FIRST_DRAWING 0 PREV_DAYS_FIRST_DUE 0 PREV_DAYS_LAST_DUE_1ST_VERSION 0 PREV_DAYS_LAST_DUE 0 PREV_DAYS_TERMINATION 0 PREV_NFLAG_INSURED_ON_APPROVAL 0 PREV_NAME_CONTRACT_TYPE_Cash loans 0 PREV_NAME_CONTRACT_TYPE_Consumer loans 0 PREV_NAME_CONTRACT_TYPE_Revolving loans 0 PREV_NAME_CONTRACT_TYPE_XNA 0 PREV_WEEKDAY_APPR_PROCESS_START_FRIDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_MONDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SATURDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SUNDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_THURSDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_TUESDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_WEDNESDAY 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_N 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_Y 0 PREV_NAME_CASH_LOAN_PURPOSE_Building a house or an annex 0 PREV_NAME_CASH_LOAN_PURPOSE_Business development 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a garage 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a home 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a new car 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a used car 0 PREV_NAME_CASH_LOAN_PURPOSE_Car repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Education 0 PREV_NAME_CASH_LOAN_PURPOSE_Everyday expenses 0 PREV_NAME_CASH_LOAN_PURPOSE_Furniture 0 PREV_NAME_CASH_LOAN_PURPOSE_Gasification / water supply 0 PREV_NAME_CASH_LOAN_PURPOSE_Hobby 0 PREV_NAME_CASH_LOAN_PURPOSE_Journey 0 PREV_NAME_CASH_LOAN_PURPOSE_Medicine 0 PREV_NAME_CASH_LOAN_PURPOSE_Money for a third person 0 PREV_NAME_CASH_LOAN_PURPOSE_Other 0 PREV_NAME_CASH_LOAN_PURPOSE_Payments on other loans 0 PREV_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment 0 PREV_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal 0 PREV_NAME_CASH_LOAN_PURPOSE_Repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Urgent needs 0 PREV_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday 0 PREV_NAME_CASH_LOAN_PURPOSE_XAP 0 PREV_NAME_CASH_LOAN_PURPOSE_XNA 0 PREV_NAME_CONTRACT_STATUS_Approved 0 PREV_NAME_CONTRACT_STATUS_Canceled 0 PREV_NAME_CONTRACT_STATUS_Refused 0 PREV_NAME_CONTRACT_STATUS_Unused offer 0 PREV_NAME_PAYMENT_TYPE_Cash through the bank 0 PREV_NAME_PAYMENT_TYPE_Cashless from the account of the employer 0 PREV_NAME_PAYMENT_TYPE_Non-cash from your account 0 PREV_NAME_PAYMENT_TYPE_XNA 0 PREV_CODE_REJECT_REASON_CLIENT 0 PREV_CODE_REJECT_REASON_HC 0 PREV_CODE_REJECT_REASON_LIMIT 0 PREV_CODE_REJECT_REASON_SCO 0 PREV_CODE_REJECT_REASON_SCOFR 0 PREV_CODE_REJECT_REASON_SYSTEM 0 PREV_CODE_REJECT_REASON_VERIF 0 PREV_CODE_REJECT_REASON_XAP 0 PREV_CODE_REJECT_REASON_XNA 0 PREV_NAME_TYPE_SUITE_Children 0 PREV_NAME_TYPE_SUITE_Family 0 PREV_NAME_TYPE_SUITE_Group of people 0 PREV_NAME_TYPE_SUITE_Other_A 0 PREV_NAME_TYPE_SUITE_Other_B 0 PREV_NAME_TYPE_SUITE_Spouse, partner 0 PREV_NAME_TYPE_SUITE_Unaccompanied 0 PREV_NAME_CLIENT_TYPE_New 0 PREV_NAME_CLIENT_TYPE_Refreshed 0 PREV_NAME_CLIENT_TYPE_Repeater 0 PREV_NAME_CLIENT_TYPE_XNA 0 PREV_NAME_GOODS_CATEGORY_Additional Service 0 PREV_NAME_GOODS_CATEGORY_Animals 0 PREV_NAME_GOODS_CATEGORY_Audio/Video 0 PREV_NAME_GOODS_CATEGORY_Auto Accessories 0 PREV_NAME_GOODS_CATEGORY_Clothing and Accessories 0 PREV_NAME_GOODS_CATEGORY_Computers 0 PREV_NAME_GOODS_CATEGORY_Construction Materials 0 PREV_NAME_GOODS_CATEGORY_Consumer Electronics 0 PREV_NAME_GOODS_CATEGORY_Direct Sales 0 PREV_NAME_GOODS_CATEGORY_Education 0 PREV_NAME_GOODS_CATEGORY_Fitness 0 PREV_NAME_GOODS_CATEGORY_Furniture 0 PREV_NAME_GOODS_CATEGORY_Gardening 0 PREV_NAME_GOODS_CATEGORY_Homewares 0 PREV_NAME_GOODS_CATEGORY_House Construction 0 PREV_NAME_GOODS_CATEGORY_Insurance 0 PREV_NAME_GOODS_CATEGORY_Jewelry 0 PREV_NAME_GOODS_CATEGORY_Medical Supplies 0 PREV_NAME_GOODS_CATEGORY_Medicine 0 PREV_NAME_GOODS_CATEGORY_Mobile 0 PREV_NAME_GOODS_CATEGORY_Office Appliances 0 PREV_NAME_GOODS_CATEGORY_Other 0 PREV_NAME_GOODS_CATEGORY_Photo / Cinema Equipment 0 PREV_NAME_GOODS_CATEGORY_Sport and Leisure 0 PREV_NAME_GOODS_CATEGORY_Tourism 0 PREV_NAME_GOODS_CATEGORY_Vehicles 0 PREV_NAME_GOODS_CATEGORY_Weapon 0 PREV_NAME_GOODS_CATEGORY_XNA 0 PREV_NAME_PORTFOLIO_Cards 0 PREV_NAME_PORTFOLIO_Cars 0 PREV_NAME_PORTFOLIO_Cash 0 PREV_NAME_PORTFOLIO_POS 0 PREV_NAME_PORTFOLIO_XNA 0 PREV_NAME_PRODUCT_TYPE_XNA 0 PREV_NAME_PRODUCT_TYPE_walk-in 0 PREV_NAME_PRODUCT_TYPE_x-sell 0 PREV_CHANNEL_TYPE_AP+ (Cash loan) 0 PREV_CHANNEL_TYPE_Car dealer 0 PREV_CHANNEL_TYPE_Channel of corporate sales 0 PREV_CHANNEL_TYPE_Contact center 0 PREV_CHANNEL_TYPE_Country-wide 0 PREV_CHANNEL_TYPE_Credit and cash offices 0 PREV_CHANNEL_TYPE_Regional / Local 0 PREV_CHANNEL_TYPE_Stone 0 PREV_NAME_SELLER_INDUSTRY_Auto technology 0 PREV_NAME_SELLER_INDUSTRY_Clothing 0 PREV_NAME_SELLER_INDUSTRY_Connectivity 0 PREV_NAME_SELLER_INDUSTRY_Construction 0 PREV_NAME_SELLER_INDUSTRY_Consumer electronics 0 PREV_NAME_SELLER_INDUSTRY_Furniture 0 PREV_NAME_SELLER_INDUSTRY_Industry 0 PREV_NAME_SELLER_INDUSTRY_Jewelry 0 PREV_NAME_SELLER_INDUSTRY_MLM partners 0 PREV_NAME_SELLER_INDUSTRY_Tourism 0 PREV_NAME_SELLER_INDUSTRY_XNA 0 PREV_NAME_YIELD_GROUP_XNA 0 PREV_NAME_YIELD_GROUP_high 0 PREV_NAME_YIELD_GROUP_low_action 0 PREV_NAME_YIELD_GROUP_low_normal 0 PREV_NAME_YIELD_GROUP_middle 0 PREV_PRODUCT_COMBINATION_Card Street 0 PREV_PRODUCT_COMBINATION_Card X-Sell 0 PREV_PRODUCT_COMBINATION_Cash 0 PREV_PRODUCT_COMBINATION_Cash Street: high 0 PREV_PRODUCT_COMBINATION_Cash Street: low 0 PREV_PRODUCT_COMBINATION_Cash Street: middle 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: high 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: low 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: middle 0 PREV_PRODUCT_COMBINATION_POS household with interest 0 PREV_PRODUCT_COMBINATION_POS household without interest 0 PREV_PRODUCT_COMBINATION_POS industry with interest 0 PREV_PRODUCT_COMBINATION_POS industry without interest 0 PREV_PRODUCT_COMBINATION_POS mobile with interest 0 PREV_PRODUCT_COMBINATION_POS mobile without interest 0 PREV_PRODUCT_COMBINATION_POS other with interest 0 PREV_PRODUCT_COMBINATION_POS others without interest 0 POS_MONTHS_BALANCE 0 POS_CNT_INSTALMENT 0 POS_CNT_INSTALMENT_FUTURE 0 POS_SK_DPD 0 POS_SK_DPD_DEF 0 POS_NAME_CONTRACT_STATUS_Active 0 POS_NAME_CONTRACT_STATUS_Amortized debt 0 POS_NAME_CONTRACT_STATUS_Approved 0 POS_NAME_CONTRACT_STATUS_Canceled 0 POS_NAME_CONTRACT_STATUS_Completed 0 POS_NAME_CONTRACT_STATUS_Demand 0 POS_NAME_CONTRACT_STATUS_Returned to the store 0 POS_NAME_CONTRACT_STATUS_Signed 0 POS_NAME_CONTRACT_STATUS_XNA 0 INSTA_NUM_INSTALMENT_VERSION 0 INSTA_NUM_INSTALMENT_NUMBER 0 INSTA_DAYS_INSTALMENT 0 INSTA_DAYS_ENTRY_PAYMENT 0 INSTA_AMT_INSTALMENT 0 INSTA_AMT_PAYMENT 0 CREDIT_MONTHS_BALANCE 0 CREDIT_AMT_BALANCE 0 CREDIT_AMT_CREDIT_LIMIT_ACTUAL 0 CREDIT_AMT_DRAWINGS_ATM_CURRENT 0 CREDIT_AMT_DRAWINGS_CURRENT 0 CREDIT_AMT_DRAWINGS_OTHER_CURRENT 0 CREDIT_AMT_DRAWINGS_POS_CURRENT 0 CREDIT_AMT_INST_MIN_REGULARITY 0 CREDIT_AMT_PAYMENT_CURRENT 0 CREDIT_AMT_PAYMENT_TOTAL_CURRENT 0 CREDIT_AMT_RECEIVABLE_PRINCIPAL 0 CREDIT_AMT_RECIVABLE 0 CREDIT_AMT_TOTAL_RECEIVABLE 0 CREDIT_CNT_DRAWINGS_ATM_CURRENT 0 CREDIT_CNT_DRAWINGS_CURRENT 0 CREDIT_CNT_DRAWINGS_OTHER_CURRENT 0 CREDIT_CNT_DRAWINGS_POS_CURRENT 0 CREDIT_CNT_INSTALMENT_MATURE_CUM 0 CREDIT_SK_DPD 0 CREDIT_SK_DPD_DEF 0 CREDIT_NAME_CONTRACT_STATUS_Active 0 CREDIT_NAME_CONTRACT_STATUS_Approved 0 CREDIT_NAME_CONTRACT_STATUS_Completed 0 CREDIT_NAME_CONTRACT_STATUS_Demand 0 CREDIT_NAME_CONTRACT_STATUS_Refused 0 CREDIT_NAME_CONTRACT_STATUS_Sent proposal 0 CREDIT_NAME_CONTRACT_STATUS_Signed 0
app_bure_prev["YEARS_BEGINEXPLUATATION_AVG"].fillna(app_bure_prev["YEARS_BEGINEXPLUATATION_AVG"].mode()[0],inplace=True)
app_bure_prev["YEARS_BEGINEXPLUATATION_MODE"].fillna(app_bure_prev["YEARS_BEGINEXPLUATATION_MODE"].mode()[0],inplace=True)
app_bure_prev["YEARS_BEGINEXPLUATATION_MEDI"].fillna(app_bure_prev["YEARS_BEGINEXPLUATATION_MEDI"].mode()[0],inplace=True)
app_bure_prev["TOTALAREA_MODE"].fillna(app_bure_prev["TOTALAREA_MODE"].mode()[0],inplace=True)
app_bure_prev["OCCUPATION_TYPE"].fillna(app_bure_prev["OCCUPATION_TYPE"].mode()[0],inplace=True)
app_bure_prev["EMERGENCYSTATE_MODE"].fillna(app_bure_prev["EMERGENCYSTATE_MODE"].mode()[0],inplace=True)
for col in app_bure_prev.columns:
print(col,app_bure_prev[col].isnull().sum())
SK_ID_CURR 0 TARGET 0 NAME_CONTRACT_TYPE 0 CODE_GENDER 0 FLAG_OWN_CAR 0 FLAG_OWN_REALTY 0 CNT_CHILDREN 0 AMT_INCOME_TOTAL 0 AMT_CREDIT 0 AMT_ANNUITY 0 AMT_GOODS_PRICE 0 NAME_TYPE_SUITE 0 NAME_INCOME_TYPE 0 NAME_EDUCATION_TYPE 0 NAME_FAMILY_STATUS 0 NAME_HOUSING_TYPE 0 REGION_POPULATION_RELATIVE 0 DAYS_BIRTH 0 DAYS_EMPLOYED 0 DAYS_REGISTRATION 0 DAYS_ID_PUBLISH 0 OWN_CAR_AGE 0 FLAG_MOBIL 0 FLAG_EMP_PHONE 0 FLAG_WORK_PHONE 0 FLAG_CONT_MOBILE 0 FLAG_PHONE 0 FLAG_EMAIL 0 OCCUPATION_TYPE 0 CNT_FAM_MEMBERS 0 REGION_RATING_CLIENT 0 REGION_RATING_CLIENT_W_CITY 0 WEEKDAY_APPR_PROCESS_START 0 HOUR_APPR_PROCESS_START 0 REG_REGION_NOT_LIVE_REGION 0 REG_REGION_NOT_WORK_REGION 0 LIVE_REGION_NOT_WORK_REGION 0 REG_CITY_NOT_LIVE_CITY 0 REG_CITY_NOT_WORK_CITY 0 LIVE_CITY_NOT_WORK_CITY 0 ORGANIZATION_TYPE 0 EXT_SOURCE_2 0 EXT_SOURCE_3 0 YEARS_BEGINEXPLUATATION_AVG 0 YEARS_BEGINEXPLUATATION_MODE 0 YEARS_BEGINEXPLUATATION_MEDI 0 TOTALAREA_MODE 0 EMERGENCYSTATE_MODE 0 OBS_30_CNT_SOCIAL_CIRCLE 0 DEF_30_CNT_SOCIAL_CIRCLE 0 OBS_60_CNT_SOCIAL_CIRCLE 0 DEF_60_CNT_SOCIAL_CIRCLE 0 DAYS_LAST_PHONE_CHANGE 0 FLAG_DOCUMENT_2 0 FLAG_DOCUMENT_3 0 FLAG_DOCUMENT_4 0 FLAG_DOCUMENT_5 0 FLAG_DOCUMENT_6 0 FLAG_DOCUMENT_7 0 FLAG_DOCUMENT_8 0 FLAG_DOCUMENT_9 0 FLAG_DOCUMENT_10 0 FLAG_DOCUMENT_11 0 FLAG_DOCUMENT_12 0 FLAG_DOCUMENT_13 0 FLAG_DOCUMENT_14 0 FLAG_DOCUMENT_15 0 FLAG_DOCUMENT_16 0 FLAG_DOCUMENT_17 0 FLAG_DOCUMENT_18 0 FLAG_DOCUMENT_19 0 FLAG_DOCUMENT_20 0 FLAG_DOCUMENT_21 0 AMT_REQ_CREDIT_BUREAU_HOUR 0 AMT_REQ_CREDIT_BUREAU_DAY 0 AMT_REQ_CREDIT_BUREAU_WEEK 0 AMT_REQ_CREDIT_BUREAU_MON 0 AMT_REQ_CREDIT_BUREAU_QRT 0 AMT_REQ_CREDIT_BUREAU_YEAR 0 INCOME_GT_CREDIT_FLAG 0 CREDIT_INCOME_PERCENT 0 ANNUITY_INCOME_PERCENT 0 CREDIT_TERM 0 DAYS_EMPLOYED_PERCENT 0 BUREAU_DAYS_CREDIT 0 BUREAU_CREDIT_DAY_OVERDUE 0 BUREAU_DAYS_CREDIT_ENDDATE 0 BUREAU_DAYS_ENDDATE_FACT 0 BUREAU_AMT_CREDIT_MAX_OVERDUE 0 BUREAU_CNT_CREDIT_PROLONG 0 BUREAU_AMT_CREDIT_SUM 0 BUREAU_AMT_CREDIT_SUM_DEBT 0 BUREAU_AMT_CREDIT_SUM_LIMIT 0 BUREAU_AMT_CREDIT_SUM_OVERDUE 0 BUREAU_DAYS_CREDIT_UPDATE 0 BUREAU_AMT_ANNUITY 0 BUREAU_CREDIT_ACTIVE_Active 0 BUREAU_CREDIT_ACTIVE_Bad debt 0 BUREAU_CREDIT_ACTIVE_Closed 0 BUREAU_CREDIT_ACTIVE_Sold 0 BUREAU_CREDIT_CURRENCY_currency 1 0 BUREAU_CREDIT_CURRENCY_currency 2 0 BUREAU_CREDIT_CURRENCY_currency 3 0 BUREAU_CREDIT_CURRENCY_currency 4 0 BUREAU_CREDIT_TYPE_Another type of loan 0 BUREAU_CREDIT_TYPE_Car loan 0 BUREAU_CREDIT_TYPE_Cash loan (non-earmarked) 0 BUREAU_CREDIT_TYPE_Consumer credit 0 BUREAU_CREDIT_TYPE_Credit card 0 BUREAU_CREDIT_TYPE_Interbank credit 0 BUREAU_CREDIT_TYPE_Loan for business development 0 BUREAU_CREDIT_TYPE_Loan for purchase of shares (margin lending) 0 BUREAU_CREDIT_TYPE_Loan for the purchase of equipment 0 BUREAU_CREDIT_TYPE_Loan for working capital replenishment 0 BUREAU_CREDIT_TYPE_Microloan 0 BUREAU_CREDIT_TYPE_Mobile operator loan 0 BUREAU_CREDIT_TYPE_Mortgage 0 BUREAU_CREDIT_TYPE_Real estate loan 0 BUREAU_CREDIT_TYPE_Unknown type of loan 0 PREV_APP_COUNT 0 PREV_AMT_ANNUITY 0 PREV_AMT_APPLICATION 0 PREV_AMT_CREDIT 0 PREV_AMT_DOWN_PAYMENT 0 PREV_AMT_GOODS_PRICE 0 PREV_HOUR_APPR_PROCESS_START 0 PREV_NFLAG_LAST_APPL_IN_DAY 0 PREV_RATE_DOWN_PAYMENT 0 PREV_RATE_INTEREST_PRIMARY 0 PREV_RATE_INTEREST_PRIVILEGED 0 PREV_DAYS_DECISION 0 PREV_SELLERPLACE_AREA 0 PREV_CNT_PAYMENT 0 PREV_DAYS_FIRST_DRAWING 0 PREV_DAYS_FIRST_DUE 0 PREV_DAYS_LAST_DUE_1ST_VERSION 0 PREV_DAYS_LAST_DUE 0 PREV_DAYS_TERMINATION 0 PREV_NFLAG_INSURED_ON_APPROVAL 0 PREV_NAME_CONTRACT_TYPE_Cash loans 0 PREV_NAME_CONTRACT_TYPE_Consumer loans 0 PREV_NAME_CONTRACT_TYPE_Revolving loans 0 PREV_NAME_CONTRACT_TYPE_XNA 0 PREV_WEEKDAY_APPR_PROCESS_START_FRIDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_MONDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SATURDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_SUNDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_THURSDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_TUESDAY 0 PREV_WEEKDAY_APPR_PROCESS_START_WEDNESDAY 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_N 0 PREV_FLAG_LAST_APPL_PER_CONTRACT_Y 0 PREV_NAME_CASH_LOAN_PURPOSE_Building a house or an annex 0 PREV_NAME_CASH_LOAN_PURPOSE_Business development 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a garage 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a home 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a new car 0 PREV_NAME_CASH_LOAN_PURPOSE_Buying a used car 0 PREV_NAME_CASH_LOAN_PURPOSE_Car repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Education 0 PREV_NAME_CASH_LOAN_PURPOSE_Everyday expenses 0 PREV_NAME_CASH_LOAN_PURPOSE_Furniture 0 PREV_NAME_CASH_LOAN_PURPOSE_Gasification / water supply 0 PREV_NAME_CASH_LOAN_PURPOSE_Hobby 0 PREV_NAME_CASH_LOAN_PURPOSE_Journey 0 PREV_NAME_CASH_LOAN_PURPOSE_Medicine 0 PREV_NAME_CASH_LOAN_PURPOSE_Money for a third person 0 PREV_NAME_CASH_LOAN_PURPOSE_Other 0 PREV_NAME_CASH_LOAN_PURPOSE_Payments on other loans 0 PREV_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment 0 PREV_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal 0 PREV_NAME_CASH_LOAN_PURPOSE_Repairs 0 PREV_NAME_CASH_LOAN_PURPOSE_Urgent needs 0 PREV_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday 0 PREV_NAME_CASH_LOAN_PURPOSE_XAP 0 PREV_NAME_CASH_LOAN_PURPOSE_XNA 0 PREV_NAME_CONTRACT_STATUS_Approved 0 PREV_NAME_CONTRACT_STATUS_Canceled 0 PREV_NAME_CONTRACT_STATUS_Refused 0 PREV_NAME_CONTRACT_STATUS_Unused offer 0 PREV_NAME_PAYMENT_TYPE_Cash through the bank 0 PREV_NAME_PAYMENT_TYPE_Cashless from the account of the employer 0 PREV_NAME_PAYMENT_TYPE_Non-cash from your account 0 PREV_NAME_PAYMENT_TYPE_XNA 0 PREV_CODE_REJECT_REASON_CLIENT 0 PREV_CODE_REJECT_REASON_HC 0 PREV_CODE_REJECT_REASON_LIMIT 0 PREV_CODE_REJECT_REASON_SCO 0 PREV_CODE_REJECT_REASON_SCOFR 0 PREV_CODE_REJECT_REASON_SYSTEM 0 PREV_CODE_REJECT_REASON_VERIF 0 PREV_CODE_REJECT_REASON_XAP 0 PREV_CODE_REJECT_REASON_XNA 0 PREV_NAME_TYPE_SUITE_Children 0 PREV_NAME_TYPE_SUITE_Family 0 PREV_NAME_TYPE_SUITE_Group of people 0 PREV_NAME_TYPE_SUITE_Other_A 0 PREV_NAME_TYPE_SUITE_Other_B 0 PREV_NAME_TYPE_SUITE_Spouse, partner 0 PREV_NAME_TYPE_SUITE_Unaccompanied 0 PREV_NAME_CLIENT_TYPE_New 0 PREV_NAME_CLIENT_TYPE_Refreshed 0 PREV_NAME_CLIENT_TYPE_Repeater 0 PREV_NAME_CLIENT_TYPE_XNA 0 PREV_NAME_GOODS_CATEGORY_Additional Service 0 PREV_NAME_GOODS_CATEGORY_Animals 0 PREV_NAME_GOODS_CATEGORY_Audio/Video 0 PREV_NAME_GOODS_CATEGORY_Auto Accessories 0 PREV_NAME_GOODS_CATEGORY_Clothing and Accessories 0 PREV_NAME_GOODS_CATEGORY_Computers 0 PREV_NAME_GOODS_CATEGORY_Construction Materials 0 PREV_NAME_GOODS_CATEGORY_Consumer Electronics 0 PREV_NAME_GOODS_CATEGORY_Direct Sales 0 PREV_NAME_GOODS_CATEGORY_Education 0 PREV_NAME_GOODS_CATEGORY_Fitness 0 PREV_NAME_GOODS_CATEGORY_Furniture 0 PREV_NAME_GOODS_CATEGORY_Gardening 0 PREV_NAME_GOODS_CATEGORY_Homewares 0 PREV_NAME_GOODS_CATEGORY_House Construction 0 PREV_NAME_GOODS_CATEGORY_Insurance 0 PREV_NAME_GOODS_CATEGORY_Jewelry 0 PREV_NAME_GOODS_CATEGORY_Medical Supplies 0 PREV_NAME_GOODS_CATEGORY_Medicine 0 PREV_NAME_GOODS_CATEGORY_Mobile 0 PREV_NAME_GOODS_CATEGORY_Office Appliances 0 PREV_NAME_GOODS_CATEGORY_Other 0 PREV_NAME_GOODS_CATEGORY_Photo / Cinema Equipment 0 PREV_NAME_GOODS_CATEGORY_Sport and Leisure 0 PREV_NAME_GOODS_CATEGORY_Tourism 0 PREV_NAME_GOODS_CATEGORY_Vehicles 0 PREV_NAME_GOODS_CATEGORY_Weapon 0 PREV_NAME_GOODS_CATEGORY_XNA 0 PREV_NAME_PORTFOLIO_Cards 0 PREV_NAME_PORTFOLIO_Cars 0 PREV_NAME_PORTFOLIO_Cash 0 PREV_NAME_PORTFOLIO_POS 0 PREV_NAME_PORTFOLIO_XNA 0 PREV_NAME_PRODUCT_TYPE_XNA 0 PREV_NAME_PRODUCT_TYPE_walk-in 0 PREV_NAME_PRODUCT_TYPE_x-sell 0 PREV_CHANNEL_TYPE_AP+ (Cash loan) 0 PREV_CHANNEL_TYPE_Car dealer 0 PREV_CHANNEL_TYPE_Channel of corporate sales 0 PREV_CHANNEL_TYPE_Contact center 0 PREV_CHANNEL_TYPE_Country-wide 0 PREV_CHANNEL_TYPE_Credit and cash offices 0 PREV_CHANNEL_TYPE_Regional / Local 0 PREV_CHANNEL_TYPE_Stone 0 PREV_NAME_SELLER_INDUSTRY_Auto technology 0 PREV_NAME_SELLER_INDUSTRY_Clothing 0 PREV_NAME_SELLER_INDUSTRY_Connectivity 0 PREV_NAME_SELLER_INDUSTRY_Construction 0 PREV_NAME_SELLER_INDUSTRY_Consumer electronics 0 PREV_NAME_SELLER_INDUSTRY_Furniture 0 PREV_NAME_SELLER_INDUSTRY_Industry 0 PREV_NAME_SELLER_INDUSTRY_Jewelry 0 PREV_NAME_SELLER_INDUSTRY_MLM partners 0 PREV_NAME_SELLER_INDUSTRY_Tourism 0 PREV_NAME_SELLER_INDUSTRY_XNA 0 PREV_NAME_YIELD_GROUP_XNA 0 PREV_NAME_YIELD_GROUP_high 0 PREV_NAME_YIELD_GROUP_low_action 0 PREV_NAME_YIELD_GROUP_low_normal 0 PREV_NAME_YIELD_GROUP_middle 0 PREV_PRODUCT_COMBINATION_Card Street 0 PREV_PRODUCT_COMBINATION_Card X-Sell 0 PREV_PRODUCT_COMBINATION_Cash 0 PREV_PRODUCT_COMBINATION_Cash Street: high 0 PREV_PRODUCT_COMBINATION_Cash Street: low 0 PREV_PRODUCT_COMBINATION_Cash Street: middle 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: high 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: low 0 PREV_PRODUCT_COMBINATION_Cash X-Sell: middle 0 PREV_PRODUCT_COMBINATION_POS household with interest 0 PREV_PRODUCT_COMBINATION_POS household without interest 0 PREV_PRODUCT_COMBINATION_POS industry with interest 0 PREV_PRODUCT_COMBINATION_POS industry without interest 0 PREV_PRODUCT_COMBINATION_POS mobile with interest 0 PREV_PRODUCT_COMBINATION_POS mobile without interest 0 PREV_PRODUCT_COMBINATION_POS other with interest 0 PREV_PRODUCT_COMBINATION_POS others without interest 0 POS_MONTHS_BALANCE 0 POS_CNT_INSTALMENT 0 POS_CNT_INSTALMENT_FUTURE 0 POS_SK_DPD 0 POS_SK_DPD_DEF 0 POS_NAME_CONTRACT_STATUS_Active 0 POS_NAME_CONTRACT_STATUS_Amortized debt 0 POS_NAME_CONTRACT_STATUS_Approved 0 POS_NAME_CONTRACT_STATUS_Canceled 0 POS_NAME_CONTRACT_STATUS_Completed 0 POS_NAME_CONTRACT_STATUS_Demand 0 POS_NAME_CONTRACT_STATUS_Returned to the store 0 POS_NAME_CONTRACT_STATUS_Signed 0 POS_NAME_CONTRACT_STATUS_XNA 0 INSTA_NUM_INSTALMENT_VERSION 0 INSTA_NUM_INSTALMENT_NUMBER 0 INSTA_DAYS_INSTALMENT 0 INSTA_DAYS_ENTRY_PAYMENT 0 INSTA_AMT_INSTALMENT 0 INSTA_AMT_PAYMENT 0 CREDIT_MONTHS_BALANCE 0 CREDIT_AMT_BALANCE 0 CREDIT_AMT_CREDIT_LIMIT_ACTUAL 0 CREDIT_AMT_DRAWINGS_ATM_CURRENT 0 CREDIT_AMT_DRAWINGS_CURRENT 0 CREDIT_AMT_DRAWINGS_OTHER_CURRENT 0 CREDIT_AMT_DRAWINGS_POS_CURRENT 0 CREDIT_AMT_INST_MIN_REGULARITY 0 CREDIT_AMT_PAYMENT_CURRENT 0 CREDIT_AMT_PAYMENT_TOTAL_CURRENT 0 CREDIT_AMT_RECEIVABLE_PRINCIPAL 0 CREDIT_AMT_RECIVABLE 0 CREDIT_AMT_TOTAL_RECEIVABLE 0 CREDIT_CNT_DRAWINGS_ATM_CURRENT 0 CREDIT_CNT_DRAWINGS_CURRENT 0 CREDIT_CNT_DRAWINGS_OTHER_CURRENT 0 CREDIT_CNT_DRAWINGS_POS_CURRENT 0 CREDIT_CNT_INSTALMENT_MATURE_CUM 0 CREDIT_SK_DPD 0 CREDIT_SK_DPD_DEF 0 CREDIT_NAME_CONTRACT_STATUS_Active 0 CREDIT_NAME_CONTRACT_STATUS_Approved 0 CREDIT_NAME_CONTRACT_STATUS_Completed 0 CREDIT_NAME_CONTRACT_STATUS_Demand 0 CREDIT_NAME_CONTRACT_STATUS_Refused 0 CREDIT_NAME_CONTRACT_STATUS_Sent proposal 0 CREDIT_NAME_CONTRACT_STATUS_Signed 0
app_bure_prev
| SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | ... | CREDIT_CNT_INSTALMENT_MATURE_CUM | CREDIT_SK_DPD | CREDIT_SK_DPD_DEF | CREDIT_NAME_CONTRACT_STATUS_Active | CREDIT_NAME_CONTRACT_STATUS_Approved | CREDIT_NAME_CONTRACT_STATUS_Completed | CREDIT_NAME_CONTRACT_STATUS_Demand | CREDIT_NAME_CONTRACT_STATUS_Refused | CREDIT_NAME_CONTRACT_STATUS_Sent proposal | CREDIT_NAME_CONTRACT_STATUS_Signed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100002 | 1 | Cash loans | M | N | Y | 0 | 202500.0 | 406597.5 | 24700.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 100003 | 0 | Cash loans | F | N | N | 0 | 270000.0 | 1293502.5 | 35698.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 100004 | 0 | Revolving loans | M | Y | Y | 0 | 67500.0 | 135000.0 | 6750.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 100006 | 0 | Cash loans | F | N | Y | 0 | 135000.0 | 312682.5 | 29686.5 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 100007 | 0 | Cash loans | M | N | Y | 0 | 121500.0 | 513000.0 | 21865.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 307506 | 456251 | 0 | Cash loans | M | N | N | 0 | 157500.0 | 254700.0 | 27558.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307507 | 456252 | 0 | Cash loans | F | N | Y | 0 | 72000.0 | 269550.0 | 12001.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307508 | 456253 | 0 | Cash loans | F | N | Y | 0 | 153000.0 | 677664.0 | 29979.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307509 | 456254 | 1 | Cash loans | F | N | Y | 0 | 171000.0 | 370107.0 | 20205.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307510 | 456255 | 0 | Cash loans | F | N | N | 0 | 157500.0 | 675000.0 | 49117.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
304531 rows × 329 columns
# Identify the categorical columns
categorical_columns = app_bure_prev.select_dtypes(include=['object']).columns
# Print the categorical columns
print(categorical_columns)
Index(['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY',
'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE',
'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE',
'WEEKDAY_APPR_PROCESS_START', 'ORGANIZATION_TYPE',
'EMERGENCYSTATE_MODE'],
dtype='object')
app_bure_prev.value_counts('INCOME_GT_CREDIT_FLAG')
INCOME_GT_CREDIT_FLAG False 290440 True 14091 dtype: int64
for i in ['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY',
'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE',
'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE',
'WEEKDAY_APPR_PROCESS_START', 'ORGANIZATION_TYPE',
'EMERGENCYSTATE_MODE','INCOME_GT_CREDIT_FLAG']:
print(i, app_bure_prev[i].unique())
print('-------------------------------------------------------------------')
NAME_CONTRACT_TYPE ['Cash loans' 'Revolving loans'] ------------------------------------------------------------------- CODE_GENDER ['M' 'F' 'XNA'] ------------------------------------------------------------------- FLAG_OWN_CAR ['N' 'Y'] ------------------------------------------------------------------- FLAG_OWN_REALTY ['Y' 'N'] ------------------------------------------------------------------- NAME_TYPE_SUITE ['Unaccompanied' 'Family' 'Spouse, partner' 'Children' 'Other_A' 'Other_B' 'Group of people'] ------------------------------------------------------------------- NAME_INCOME_TYPE ['Working' 'State servant' 'Commercial associate' 'Pensioner' 'Unemployed' 'Student' 'Businessman' 'Maternity leave'] ------------------------------------------------------------------- NAME_EDUCATION_TYPE ['Secondary / secondary special' 'Higher education' 'Incomplete higher' 'Lower secondary' 'Academic degree'] ------------------------------------------------------------------- NAME_FAMILY_STATUS ['Single / not married' 'Married' 'Civil marriage' 'Widow' 'Separated'] ------------------------------------------------------------------- NAME_HOUSING_TYPE ['House / apartment' 'Rented apartment' 'With parents' 'Municipal apartment' 'Office apartment' 'Co-op apartment'] ------------------------------------------------------------------- OCCUPATION_TYPE ['Laborers' 'Core staff' 'Accountants' 'Managers' 'Drivers' 'Sales staff' 'Cleaning staff' 'Cooking staff' 'Private service staff' 'Medicine staff' 'Security staff' 'High skill tech staff' 'Waiters/barmen staff' 'Low-skill Laborers' 'Realty agents' 'Secretaries' 'IT staff' 'HR staff'] ------------------------------------------------------------------- WEEKDAY_APPR_PROCESS_START ['WEDNESDAY' 'MONDAY' 'THURSDAY' 'SUNDAY' 'SATURDAY' 'FRIDAY' 'TUESDAY'] ------------------------------------------------------------------- ORGANIZATION_TYPE ['Business Entity Type 3' 'School' 'Government' 'Religion' 'Other' 'XNA' 'Electricity' 'Medicine' 'Business Entity Type 2' 'Self-employed' 'Transport: type 2' 'Construction' 'Housing' 'Kindergarten' 'Trade: type 7' 'Industry: type 11' 'Military' 'Services' 'Security Ministries' 'Transport: type 4' 'Industry: type 1' 'Emergency' 'Security' 'Trade: type 2' 'University' 'Police' 'Business Entity Type 1' 'Postal' 'Transport: type 3' 'Industry: type 4' 'Agriculture' 'Restaurant' 'Culture' 'Hotel' 'Industry: type 7' 'Trade: type 3' 'Industry: type 3' 'Bank' 'Industry: type 9' 'Insurance' 'Trade: type 6' 'Industry: type 2' 'Transport: type 1' 'Industry: type 12' 'Mobile' 'Trade: type 1' 'Industry: type 5' 'Industry: type 10' 'Legal Services' 'Advertising' 'Trade: type 5' 'Cleaning' 'Industry: type 13' 'Trade: type 4' 'Telecom' 'Industry: type 8' 'Realtor' 'Industry: type 6'] ------------------------------------------------------------------- EMERGENCYSTATE_MODE ['No' 'Yes'] ------------------------------------------------------------------- INCOME_GT_CREDIT_FLAG [False True] -------------------------------------------------------------------
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for i in ['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY',
'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE',
'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE',
'WEEKDAY_APPR_PROCESS_START', 'ORGANIZATION_TYPE',
'EMERGENCYSTATE_MODE','INCOME_GT_CREDIT_FLAG']:
app_bure_prev[i] = le.fit_transform(app_bure_prev[i])
app_bure_prev.head()
| SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | ... | CREDIT_CNT_INSTALMENT_MATURE_CUM | CREDIT_SK_DPD | CREDIT_SK_DPD_DEF | CREDIT_NAME_CONTRACT_STATUS_Active | CREDIT_NAME_CONTRACT_STATUS_Approved | CREDIT_NAME_CONTRACT_STATUS_Completed | CREDIT_NAME_CONTRACT_STATUS_Demand | CREDIT_NAME_CONTRACT_STATUS_Refused | CREDIT_NAME_CONTRACT_STATUS_Sent proposal | CREDIT_NAME_CONTRACT_STATUS_Signed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100002 | 1 | 0 | 1 | 0 | 1 | 0 | 202500.0 | 406597.5 | 24700.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 100003 | 0 | 0 | 0 | 0 | 0 | 0 | 270000.0 | 1293502.5 | 35698.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 100004 | 0 | 1 | 1 | 1 | 1 | 0 | 67500.0 | 135000.0 | 6750.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 100006 | 0 | 0 | 0 | 0 | 1 | 0 | 135000.0 | 312682.5 | 29686.5 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 100007 | 0 | 0 | 1 | 0 | 1 | 0 | 121500.0 | 513000.0 | 21865.5 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
5 rows × 329 columns
x=app_bure_prev.drop(['TARGET','SK_ID_CURR'],axis=1)
x.head()
| NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | ... | CREDIT_CNT_INSTALMENT_MATURE_CUM | CREDIT_SK_DPD | CREDIT_SK_DPD_DEF | CREDIT_NAME_CONTRACT_STATUS_Active | CREDIT_NAME_CONTRACT_STATUS_Approved | CREDIT_NAME_CONTRACT_STATUS_Completed | CREDIT_NAME_CONTRACT_STATUS_Demand | CREDIT_NAME_CONTRACT_STATUS_Refused | CREDIT_NAME_CONTRACT_STATUS_Sent proposal | CREDIT_NAME_CONTRACT_STATUS_Signed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 202500.0 | 406597.5 | 24700.5 | 351000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 270000.0 | 1293502.5 | 35698.5 | 1129500.0 | 1 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 1 | 1 | 1 | 1 | 0 | 67500.0 | 135000.0 | 6750.0 | 135000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 0 | 0 | 0 | 1 | 0 | 135000.0 | 312682.5 | 29686.5 | 297000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0 | 1 | 0 | 1 | 0 | 121500.0 | 513000.0 | 21865.5 | 513000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
5 rows × 327 columns
y=app_bure_prev.TARGET
y
0 1
1 0
2 0
3 0
4 0
..
307506 0
307507 0
307508 0
307509 1
307510 0
Name: TARGET, Length: 304531, dtype: int64
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=33)
y_test
134941 1
259901 0
213366 0
151480 0
290128 0
..
164694 0
85126 0
198511 1
64162 0
146734 0
Name: TARGET, Length: 76133, dtype: int64
x_train
| NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | ... | CREDIT_CNT_INSTALMENT_MATURE_CUM | CREDIT_SK_DPD | CREDIT_SK_DPD_DEF | CREDIT_NAME_CONTRACT_STATUS_Active | CREDIT_NAME_CONTRACT_STATUS_Approved | CREDIT_NAME_CONTRACT_STATUS_Completed | CREDIT_NAME_CONTRACT_STATUS_Demand | CREDIT_NAME_CONTRACT_STATUS_Refused | CREDIT_NAME_CONTRACT_STATUS_Sent proposal | CREDIT_NAME_CONTRACT_STATUS_Signed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 51515 | 0 | 1 | 1 | 1 | 0 | 180000.0 | 1560726.0 | 43047.0 | 1395000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 240897 | 0 | 1 | 0 | 1 | 0 | 135000.0 | 640080.0 | 29970.0 | 450000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 261925 | 0 | 0 | 0 | 1 | 2 | 135000.0 | 269982.0 | 29205.0 | 238500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 82413 | 0 | 0 | 1 | 1 | 0 | 157500.0 | 534141.0 | 16771.5 | 441000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 214981 | 0 | 0 | 0 | 1 | 0 | 94500.0 | 116892.0 | 5580.0 | 76500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 149001 | 0 | 0 | 1 | 1 | 0 | 450000.0 | 1235587.5 | 40963.5 | 1107000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 198572 | 0 | 1 | 1 | 0 | 0 | 315000.0 | 497520.0 | 39307.5 | 450000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 160464 | 0 | 0 | 0 | 1 | 0 | 112500.0 | 729792.0 | 35109.0 | 630000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 226067 | 1 | 1 | 0 | 1 | 0 | 445500.0 | 427500.0 | 21375.0 | 427500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 105162 | 0 | 0 | 0 | 0 | 0 | 315000.0 | 647046.0 | 17199.0 | 463500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
228398 rows × 327 columns
x_train
| NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | ... | CREDIT_CNT_INSTALMENT_MATURE_CUM | CREDIT_SK_DPD | CREDIT_SK_DPD_DEF | CREDIT_NAME_CONTRACT_STATUS_Active | CREDIT_NAME_CONTRACT_STATUS_Approved | CREDIT_NAME_CONTRACT_STATUS_Completed | CREDIT_NAME_CONTRACT_STATUS_Demand | CREDIT_NAME_CONTRACT_STATUS_Refused | CREDIT_NAME_CONTRACT_STATUS_Sent proposal | CREDIT_NAME_CONTRACT_STATUS_Signed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 51515 | 0 | 1 | 1 | 1 | 0 | 180000.0 | 1560726.0 | 43047.0 | 1395000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 240897 | 0 | 1 | 0 | 1 | 0 | 135000.0 | 640080.0 | 29970.0 | 450000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 261925 | 0 | 0 | 0 | 1 | 2 | 135000.0 | 269982.0 | 29205.0 | 238500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 82413 | 0 | 0 | 1 | 1 | 0 | 157500.0 | 534141.0 | 16771.5 | 441000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 214981 | 0 | 0 | 0 | 1 | 0 | 94500.0 | 116892.0 | 5580.0 | 76500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 149001 | 0 | 0 | 1 | 1 | 0 | 450000.0 | 1235587.5 | 40963.5 | 1107000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 198572 | 0 | 1 | 1 | 0 | 0 | 315000.0 | 497520.0 | 39307.5 | 450000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 160464 | 0 | 0 | 0 | 1 | 0 | 112500.0 | 729792.0 | 35109.0 | 630000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 226067 | 1 | 1 | 0 | 1 | 0 | 445500.0 | 427500.0 | 21375.0 | 427500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 105162 | 0 | 0 | 0 | 0 | 0 | 315000.0 | 647046.0 | 17199.0 | 463500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
228398 rows × 327 columns
y_train
51515 0
240897 0
261925 0
82413 0
214981 0
..
149001 0
198572 0
160464 0
226067 0
105162 0
Name: TARGET, Length: 228398, dtype: int64
y.value_counts()
0 279864 1 24667 Name: TARGET, dtype: int64
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaled_data=scaler.fit_transform(x)
df=pd.DataFrame(data=scaled_data,columns=x.columns)
df.head
<bound method NDFrame.head of NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY \
0 0.0 0.5 0.0 1.0
1 0.0 0.0 0.0 0.0
2 1.0 0.5 1.0 1.0
3 0.0 0.0 0.0 1.0
4 0.0 0.5 0.0 1.0
... ... ... ... ...
304526 0.0 0.5 0.0 0.0
304527 0.0 0.0 0.0 1.0
304528 0.0 0.0 0.0 1.0
304529 0.0 0.0 0.0 1.0
304530 0.0 0.0 0.0 0.0
CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY \
0 0.0 0.001512 0.090287 0.090032
1 0.0 0.002089 0.311736 0.132924
2 0.0 0.000358 0.022472 0.020025
3 0.0 0.000935 0.066837 0.109477
4 0.0 0.000819 0.116854 0.078975
... ... ... ... ...
304526 0.0 0.001127 0.052360 0.101176
304527 0.0 0.000396 0.056067 0.040505
304528 0.0 0.001089 0.157969 0.110618
304529 0.0 0.001243 0.081175 0.072499
304530 0.0 0.001127 0.157303 0.185258
AMT_GOODS_PRICE NAME_TYPE_SUITE ... \
0 0.077441 1.000000 ...
1 0.271605 0.166667 ...
2 0.023569 1.000000 ...
3 0.063973 1.000000 ...
4 0.117845 1.000000 ...
... ... ... ...
304526 0.046016 1.000000 ...
304527 0.046016 1.000000 ...
304528 0.135802 1.000000 ...
304529 0.069585 1.000000 ...
304530 0.158249 1.000000 ...
CREDIT_CNT_INSTALMENT_MATURE_CUM CREDIT_SK_DPD CREDIT_SK_DPD_DEF \
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 0.0 0.0 0.0
... ... ... ...
304526 0.0 0.0 0.0
304527 0.0 0.0 0.0
304528 0.0 0.0 0.0
304529 0.0 0.0 0.0
304530 0.0 0.0 0.0
CREDIT_NAME_CONTRACT_STATUS_Active \
0 0.0
1 0.0
2 0.0
3 1.0
4 0.0
... ...
304526 0.0
304527 0.0
304528 0.0
304529 0.0
304530 0.0
CREDIT_NAME_CONTRACT_STATUS_Approved \
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
... ...
304526 0.0
304527 0.0
304528 0.0
304529 0.0
304530 0.0
CREDIT_NAME_CONTRACT_STATUS_Completed \
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
... ...
304526 0.0
304527 0.0
304528 0.0
304529 0.0
304530 0.0
CREDIT_NAME_CONTRACT_STATUS_Demand \
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
... ...
304526 0.0
304527 0.0
304528 0.0
304529 0.0
304530 0.0
CREDIT_NAME_CONTRACT_STATUS_Refused \
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
... ...
304526 0.0
304527 0.0
304528 0.0
304529 0.0
304530 0.0
CREDIT_NAME_CONTRACT_STATUS_Sent proposal \
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
... ...
304526 0.0
304527 0.0
304528 0.0
304529 0.0
304530 0.0
CREDIT_NAME_CONTRACT_STATUS_Signed
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
... ...
304526 0.0
304527 0.0
304528 0.0
304529 0.0
304530 0.0
[304531 rows x 327 columns]>
from sklearn.decomposition import PCA
pca=PCA()
principalcomponents=pca.fit_transform(df)
plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Variance(%)')
plt.title('Explained Variance')
plt.show()
pca=PCA(n_components=100)
new_data=pca.fit_transform(df)
i=1
for i in range(1,100):
i=i+1
print(i)
principal_df=pd.DataFrame(data=new_data,columns=print(i))
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 100
print(np.cumsum(pca.explained_variance_ratio_))
[0.13703072 0.19827163 0.25539076 0.29448893 0.3291062 0.36142485 0.39144063 0.41963488 0.44715696 0.47190124 0.4937025 0.51512512 0.53499643 0.55411458 0.57226237 0.58997943 0.60411718 0.61775697 0.63034134 0.64249376 0.65441404 0.66541721 0.67579626 0.68574853 0.69521506 0.70411474 0.71294788 0.721705 0.73027523 0.7382594 0.74620079 0.75394911 0.76161212 0.76902063 0.7762017 0.78308656 0.7897872 0.79638021 0.80295271 0.80948365 0.81598063 0.82245577 0.82875674 0.83488919 0.84061445 0.84627447 0.85161969 0.85676582 0.86170511 0.86646483 0.87118848 0.87581601 0.88037229 0.88478778 0.88906676 0.89315644 0.89711854 0.90097744 0.90469099 0.90829636 0.91179795 0.91511401 0.91816486 0.9210018 0.92378782 0.9263941 0.92893867 0.93139733 0.93384103 0.93625241 0.93845657 0.94054386 0.9426014 0.94457709 0.94645363 0.94829237 0.9500309 0.95168446 0.95325096 0.95480117 0.95634487 0.95772624 0.95908233 0.96033498 0.96154759 0.96271282 0.96387123 0.96501777 0.96607519 0.96711017 0.96811333 0.96908436 0.97003736 0.97098791 0.97190339 0.97280466 0.97365512 0.97447107 0.97527222 0.97605349]
principal_df
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -1.069993 | 0.182752 | 0.224762 | 0.466566 | -0.296921 | -0.295750 | -0.463716 | -0.513895 | -0.529139 | 0.842591 | ... | 0.124051 | -0.185508 | -0.460209 | -0.058217 | -0.218401 | 0.101643 | -0.099580 | 0.007500 | 0.033038 | -0.292569 |
| 1 | -0.232059 | -0.290004 | -0.151789 | -0.541143 | -0.104399 | -0.504114 | -0.850435 | 0.448569 | -0.018879 | 0.404731 | ... | -0.020579 | -0.024259 | 0.078055 | -0.069768 | 0.013315 | 0.024694 | -0.080420 | -0.023699 | 0.042591 | -0.012710 |
| 2 | -1.577270 | 0.033349 | 0.153514 | -0.562490 | 0.071476 | -0.089890 | 0.600636 | 0.423613 | -0.550142 | 1.043077 | ... | 0.070635 | -0.041994 | -0.077366 | -0.014778 | 0.030544 | 0.072083 | 0.017788 | 0.133722 | -0.194084 | 0.007447 |
| 3 | 1.322030 | 0.276303 | -0.435906 | 1.175515 | 0.303640 | 0.478250 | -0.597051 | -0.083415 | -0.389949 | -0.263187 | ... | 0.014922 | -0.031021 | -0.023674 | 0.020413 | -0.016230 | -0.016487 | 0.033022 | 0.024404 | -0.013820 | 0.042557 |
| 4 | 0.395797 | -0.079751 | -0.457191 | -0.640798 | 0.707315 | -0.687447 | 0.779387 | -0.091725 | 0.665795 | -0.281248 | ... | -0.053203 | 0.069412 | 0.015410 | 0.001196 | -0.053472 | -0.001371 | 0.001292 | -0.042720 | 0.042988 | 0.016247 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 304526 | -1.729526 | 0.280461 | 0.247553 | 0.000323 | 1.068703 | 1.889744 | 0.228819 | 0.178182 | -0.277107 | 0.155246 | ... | -0.009585 | 0.003983 | 0.037711 | 0.010435 | -0.060908 | -0.033467 | -0.033827 | -0.123686 | -0.002520 | -0.065890 |
| 304527 | -1.446905 | -1.583707 | 0.215260 | 0.374959 | 0.461163 | 0.428501 | -1.070779 | 0.541500 | 0.672837 | -0.298517 | ... | 0.027728 | -0.076818 | -0.185081 | -0.027195 | -0.021873 | -0.033492 | 0.001997 | 0.015269 | -0.027229 | 0.045366 |
| 304528 | -1.395311 | 0.298903 | -0.774347 | -1.096497 | 0.236309 | 0.680991 | 0.180618 | -0.927396 | 0.680844 | -0.449020 | ... | -0.018001 | 0.016631 | 0.077625 | 0.043507 | 0.111468 | 0.016865 | -0.113705 | -0.340958 | -0.071870 | 0.042391 |
| 304529 | -1.380803 | 0.119600 | -0.547598 | 0.191041 | -0.086738 | 0.029911 | 0.415059 | -0.914253 | 0.044289 | -0.050822 | ... | -0.030254 | -0.012058 | 0.036630 | -0.015014 | -0.016423 | -0.071066 | -0.016776 | -0.083633 | 0.079677 | -0.027230 |
| 304530 | 0.427929 | 0.787596 | -0.399538 | -0.822034 | 0.535917 | -0.270132 | -0.256005 | 0.195575 | 0.892969 | 1.021583 | ... | 0.004958 | 0.000542 | 0.031326 | 0.024258 | -0.083016 | -0.018877 | 0.006587 | 0.037809 | -0.059083 | -0.042105 |
304531 rows × 100 columns
principal_df.corr()
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.000000e+00 | 3.509523e-17 | -6.003601e-17 | -2.603317e-17 | -7.473187e-18 | 1.808072e-17 | 4.411952e-17 | 3.049601e-17 | 1.951003e-18 | -1.012574e-17 | ... | 1.028278e-18 | 3.432784e-18 | 5.814233e-17 | 1.200717e-17 | 9.977098e-18 | -2.770747e-17 | 6.043889e-18 | 4.555241e-17 | 7.089736e-17 | 1.258585e-17 |
| 1 | 3.509523e-17 | 1.000000e+00 | 2.401852e-15 | 2.967990e-16 | 2.936291e-16 | 2.972742e-16 | -1.530355e-16 | 1.050882e-16 | 5.791542e-17 | 5.652962e-17 | ... | 3.354229e-18 | 9.298019e-18 | 2.204460e-17 | 1.406786e-17 | 3.646777e-18 | -7.534634e-18 | -6.999909e-18 | 1.804254e-17 | 2.075264e-18 | -1.664701e-17 |
| 2 | -6.003601e-17 | 2.401852e-15 | 1.000000e+00 | 3.510650e-16 | -6.402128e-16 | -3.058900e-16 | -5.044898e-16 | 1.289581e-16 | 1.439599e-16 | 4.743130e-17 | ... | -5.845439e-18 | -1.180659e-17 | -3.034058e-18 | -4.614028e-18 | -9.276155e-18 | -2.366614e-17 | 1.003848e-18 | -4.714906e-18 | -1.037746e-17 | 2.835443e-17 |
| 3 | -2.603317e-17 | 2.967990e-16 | 3.510650e-16 | 1.000000e+00 | 1.697816e-15 | -6.053957e-16 | -1.910533e-15 | 1.159614e-16 | -1.015989e-15 | 1.470259e-15 | ... | 5.868714e-18 | -1.260149e-17 | -5.202062e-19 | 6.967725e-18 | -6.657985e-18 | 4.120928e-18 | 2.127518e-17 | -2.018742e-17 | -7.589839e-18 | -1.806491e-17 |
| 4 | -7.473187e-18 | 2.936291e-16 | -6.402128e-16 | 1.697816e-15 | 1.000000e+00 | 1.266313e-15 | -1.366309e-15 | -5.357198e-16 | -3.942660e-17 | 1.368680e-15 | ... | 1.752911e-17 | 1.850881e-17 | -2.325193e-17 | 2.027920e-17 | 6.331575e-18 | -1.877933e-17 | -1.358344e-17 | 4.851004e-17 | -8.748591e-18 | 3.858179e-17 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 95 | -2.770747e-17 | -7.534634e-18 | -2.366614e-17 | 4.120928e-18 | -1.877933e-17 | -1.556456e-18 | 1.295660e-17 | 1.455095e-17 | 1.773289e-17 | -5.201982e-19 | ... | 6.748602e-16 | -1.576131e-15 | -8.834171e-16 | -9.493750e-16 | -3.141909e-16 | 1.000000e+00 | -1.569977e-15 | -1.765580e-15 | 1.048564e-15 | -1.565231e-15 |
| 96 | 6.043889e-18 | -6.999909e-18 | 1.003848e-18 | 2.127518e-17 | -1.358344e-17 | 5.438462e-17 | -7.428519e-18 | -1.588946e-17 | -3.398423e-17 | 1.585808e-17 | ... | 8.638014e-16 | -4.000258e-16 | 1.328441e-17 | 1.668062e-16 | -6.943594e-16 | -1.569977e-15 | 1.000000e+00 | 4.594176e-16 | 4.719919e-17 | 9.476219e-16 |
| 97 | 4.555241e-17 | 1.804254e-17 | -4.714906e-18 | -2.018742e-17 | 4.851004e-17 | 1.272123e-17 | 3.673731e-17 | 5.816711e-18 | -1.603250e-17 | 2.275273e-17 | ... | 2.664862e-15 | -4.392981e-17 | 1.582704e-15 | 2.162393e-16 | -1.269384e-17 | -1.765580e-15 | 4.594176e-16 | 1.000000e+00 | -8.744915e-16 | -1.373196e-16 |
| 98 | 7.089736e-17 | 2.075264e-18 | -1.037746e-17 | -7.589839e-18 | -8.748591e-18 | -2.612238e-17 | 7.165160e-19 | 2.854139e-17 | -4.627516e-18 | -7.115592e-19 | ... | -1.071332e-15 | -2.281463e-16 | -3.690612e-16 | -2.371551e-16 | -8.263655e-17 | 1.048564e-15 | 4.719919e-17 | -8.744915e-16 | 1.000000e+00 | -5.009471e-18 |
| 99 | 1.258585e-17 | -1.664701e-17 | 2.835443e-17 | -1.806491e-17 | 3.858179e-17 | 1.955428e-17 | 1.718217e-17 | -2.016645e-17 | 4.436831e-17 | 1.723623e-17 | ... | 8.597012e-16 | 8.644199e-16 | 1.680888e-15 | 4.899231e-16 | 1.697981e-16 | -1.565231e-15 | 9.476219e-16 | -1.373196e-16 | -5.009471e-18 | 1.000000e+00 |
100 rows × 100 columns
plt.figure(figsize=(90,50))
sns.heatmap(principal_df.corr(),annot=True,cmap='viridis')
<AxesSubplot: >
x=app_bure_prev.drop(['TARGET','SK_ID_CURR'],axis=1)
x
| NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | ... | CREDIT_CNT_INSTALMENT_MATURE_CUM | CREDIT_SK_DPD | CREDIT_SK_DPD_DEF | CREDIT_NAME_CONTRACT_STATUS_Active | CREDIT_NAME_CONTRACT_STATUS_Approved | CREDIT_NAME_CONTRACT_STATUS_Completed | CREDIT_NAME_CONTRACT_STATUS_Demand | CREDIT_NAME_CONTRACT_STATUS_Refused | CREDIT_NAME_CONTRACT_STATUS_Sent proposal | CREDIT_NAME_CONTRACT_STATUS_Signed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 202500.0 | 406597.5 | 24700.5 | 351000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 270000.0 | 1293502.5 | 35698.5 | 1129500.0 | 1 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 1 | 1 | 1 | 1 | 0 | 67500.0 | 135000.0 | 6750.0 | 135000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 0 | 0 | 0 | 1 | 0 | 135000.0 | 312682.5 | 29686.5 | 297000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0 | 1 | 0 | 1 | 0 | 121500.0 | 513000.0 | 21865.5 | 513000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 307506 | 0 | 1 | 0 | 0 | 0 | 157500.0 | 254700.0 | 27558.0 | 225000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307507 | 0 | 0 | 0 | 1 | 0 | 72000.0 | 269550.0 | 12001.5 | 225000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307508 | 0 | 0 | 0 | 1 | 0 | 153000.0 | 677664.0 | 29979.0 | 585000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307509 | 0 | 0 | 0 | 1 | 0 | 171000.0 | 370107.0 | 20205.0 | 319500.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 307510 | 0 | 0 | 0 | 0 | 0 | 157500.0 | 675000.0 | 49117.5 | 675000.0 | 6 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
304531 rows × 327 columns
y=app_bure_prev.TARGET
y
0 1
1 0
2 0
3 0
4 0
..
307506 0
307507 0
307508 0
307509 1
307510 0
Name: TARGET, Length: 304531, dtype: int64
## preparing training and testing data
from sklearn.model_selection import train_test_split
x_train_, x_test_, y_train_, y_test_ = train_test_split(x, y, test_size=0.30, random_state=42)
from sklearn.metrics import accuracy_score, confusion_matrix,precision_score,recall_score,f1_score,classification_report
from sklearn.linear_model import LogisticRegression
LR=LogisticRegression()
LR.fit(x_train,y_train)
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
y_hat_test=LR.predict(x_test)
y_hat_test
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
acc=accuracy_score(y_test,y_hat_test)
acc
0.9188131296546832
print(classification_report(y_test,y_hat_test))
precision recall f1-score support
0 0.92 1.00 0.96 69975
1 0.19 0.00 0.00 6158
accuracy 0.92 76133
macro avg 0.55 0.50 0.48 76133
weighted avg 0.86 0.92 0.88 76133
y_hat_train=LR.predict(x_train)
y_hat_train
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
acc=accuracy_score(y_train,y_hat_train)
acc
0.9187120727852258
print(classification_report(y_train,y_hat_train))
precision recall f1-score support
0 0.92 1.00 0.96 209889
1 0.19 0.00 0.00 18509
accuracy 0.92 228398
macro avg 0.56 0.50 0.48 228398
weighted avg 0.86 0.92 0.88 228398
from imblearn.over_sampling import SMOTE
sm=SMOTE()
x_sm,y_sm=sm.fit_resample(x_train,y_train)
x_sm1,y_sm1=sm.fit_resample(x_test,y_test)
from collections import Counter
print(Counter(y_test))
print(Counter(y_sm1))
Counter({0: 69975, 1: 6158})
Counter({1: 69975, 0: 69975})
from collections import Counter
print(Counter(y_train))
print(Counter(y_sm))
Counter({0: 209889, 1: 18509})
Counter({0: 209889, 1: 209889})
from sklearn.linear_model import LogisticRegression
LR=LogisticRegression()
LR.fit(x_sm,y_sm)
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
y_sm_test=LR.predict(x_sm1)
y_sm_test
array([0, 1, 0, ..., 1, 1, 1], dtype=int64)
acc=accuracy_score(y_sm1,y_sm_test)
acc
0.6238585209003216
print(classification_report(y_sm1,y_sm_test))
precision recall f1-score support
0 0.63 0.60 0.61 69975
1 0.62 0.65 0.63 69975
accuracy 0.62 139950
macro avg 0.62 0.62 0.62 139950
weighted avg 0.62 0.62 0.62 139950
y_sm_train=LR.predict(x_sm)
y_sm_train
array([0, 1, 1, ..., 0, 0, 1], dtype=int64)
acc=accuracy_score(y_sm,y_sm_train)
acc
0.6198847962494461
print(classification_report(y_sm,y_sm_train))
precision recall f1-score support
0 0.62 0.60 0.61 209889
1 0.62 0.64 0.63 209889
accuracy 0.62 419778
macro avg 0.62 0.62 0.62 419778
weighted avg 0.62 0.62 0.62 419778
from sklearn.tree import DecisionTreeClassifier#importing decision tree from sklearn.tree
dt=DecisionTreeClassifier() #object creation for decision tree
dt.fit(x_train,y_train) #training the model
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier()
y_hat_test1=dt.predict(x_test)#prediction
y_hat_test1#predicted values
array([1, 0, 0, ..., 1, 0, 0], dtype=int64)
acc=accuracy_score(y_test,y_hat_test1)
acc
0.8536902525842933
print(classification_report(y_test,y_hat_test))
precision recall f1-score support
0 0.92 1.00 0.96 69975
1 0.19 0.00 0.00 6158
accuracy 0.92 76133
macro avg 0.55 0.50 0.48 76133
weighted avg 0.86 0.92 0.88 76133
y_hat_train1=dt.predict(x_train)
y_hat_train1
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
acc=accuracy_score(y_train,y_hat_train)
acc
0.9187120727852258
print(classification_report(y_train,y_hat_train1))
precision recall f1-score support
0 1.00 1.00 1.00 209889
1 1.00 1.00 1.00 18509
accuracy 1.00 228398
macro avg 1.00 1.00 1.00 228398
weighted avg 1.00 1.00 1.00 228398
from sklearn.tree import DecisionTreeClassifier#importing decision tree from sklearn.tree
dt=DecisionTreeClassifier() #object creation for decision tree
dt.fit(x_sm,y_sm) #training the model
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier()
y_sm_test1=dt.predict(x_sm1)
y_sm_test1
array([0, 0, 0, ..., 1, 1, 1], dtype=int64)
acc=accuracy_score(y_sm1,y_sm_test1)
acc
0.8546052161486245
print(classification_report(y_sm1,y_sm_test1))
precision recall f1-score support
0 0.84 0.88 0.86 69975
1 0.88 0.83 0.85 69975
accuracy 0.85 139950
macro avg 0.86 0.85 0.85 139950
weighted avg 0.86 0.85 0.85 139950
y_sm_train1=dt.predict(x_sm)
y_sm_train1
array([0, 0, 0, ..., 1, 1, 1], dtype=int64)
acc=accuracy_score(y_sm,y_sm_train1)
acc
1.0
print(classification_report(y_sm,y_sm_train1))
precision recall f1-score support
0 1.00 1.00 1.00 209889
1 1.00 1.00 1.00 209889
accuracy 1.00 419778
macro avg 1.00 1.00 1.00 419778
weighted avg 1.00 1.00 1.00 419778
from sklearn.ensemble import RandomForestClassifier #importing randomforest
rf= RandomForestClassifier()#object creation ,taking 100 decision tree in random forest
rf.fit(x_train,y_train)#training the data
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier()
y_predict_test=rf.predict(x_test)#testing
y_predict_test
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
acc=accuracy_score(y_test,y_predict_test)
acc
0.9191809070967911
print(classification_report(y_test,y_predict_test))
precision recall f1-score support
0 0.92 1.00 0.96 69975
1 1.00 0.00 0.00 6158
accuracy 0.92 76133
macro avg 0.96 0.50 0.48 76133
weighted avg 0.93 0.92 0.88 76133
y_predict_train=rf.predict(x_train)#testing
y_predict_train
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
acc=accuracy_score(y_train,y_predict_train)
acc
0.9999343251692221
print(classification_report(y_train,y_predict_train))
precision recall f1-score support
0 1.00 1.00 1.00 209889
1 1.00 1.00 1.00 18509
accuracy 1.00 228398
macro avg 1.00 1.00 1.00 228398
weighted avg 1.00 1.00 1.00 228398
from sklearn.ensemble import RandomForestClassifier #importing randomforest
rf= RandomForestClassifier()#object creation ,taking 100 decision tree in random forest
rf.fit(x_sm,y_sm)
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier()
y_sm_test2=rf.predict(x_sm1)
y_sm_test2
array([0, 0, 0, ..., 1, 1, 1], dtype=int64)
acc=accuracy_score(y_sm1,y_sm_test2)
acc
0.9267667023937121
print(classification_report(y_sm1,y_sm_test2))
precision recall f1-score support
0 0.88 0.99 0.93 69975
1 0.99 0.86 0.92 69975
accuracy 0.93 139950
macro avg 0.93 0.93 0.93 139950
weighted avg 0.93 0.93 0.93 139950
y_sm_train2=rf.predict(x_sm)
y_sm_train2
array([0, 0, 0, ..., 1, 1, 1], dtype=int64)
acc=accuracy_score(y_sm,y_sm_train2)
acc
1.0
print(classification_report(y_sm,y_sm_train2))
precision recall f1-score support
0 1.00 1.00 1.00 209889
1 1.00 1.00 1.00 209889
accuracy 1.00 419778
macro avg 1.00 1.00 1.00 419778
weighted avg 1.00 1.00 1.00 419778
import xgboost
## model creation
from xgboost import XGBClassifier #importing the model library
xgb=XGBClassifier() ## object creation
xgb.fit(x_train,y_train)
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=None, ...)y_hat_test2=xgb.predict(x_test)
y_hat_test2
array([0, 0, 0, ..., 0, 0, 0])
acc=accuracy_score(y_test,y_hat_test2)
acc
0.9185372965731023
print(classification_report(y_test,y_hat_test2))
precision recall f1-score support
0 0.92 0.99 0.96 69975
1 0.47 0.06 0.10 6158
accuracy 0.92 76133
macro avg 0.70 0.53 0.53 76133
weighted avg 0.89 0.92 0.89 76133
y_hat_train2=xgb.predict(x_train)
y_hat_train2
array([0, 0, 0, ..., 0, 0, 0])
acc=accuracy_score(y_train,y_hat_train2)
acc
0.928156113451081
print(classification_report(y_train,y_hat_train2))
precision recall f1-score support
0 0.93 1.00 0.96 209889
1 0.86 0.14 0.23 18509
accuracy 0.93 228398
macro avg 0.89 0.57 0.60 228398
weighted avg 0.92 0.93 0.90 228398
from xgboost import XGBClassifier #importing the model library
xgb=XGBClassifier() ## object creation
xgb.fit(x_sm,y_sm)
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=None, ...)y_sm_test4=xgb.predict(x_sm1)
y_sm_test4
array([0, 0, 0, ..., 1, 1, 1])
acc=accuracy_score(y_sm1,y_sm_test4)
acc
0.954076455877099
print(classification_report(y_sm1,y_sm_test4))
precision recall f1-score support
0 0.92 0.99 0.96 69975
1 0.99 0.92 0.95 69975
accuracy 0.95 139950
macro avg 0.96 0.95 0.95 139950
weighted avg 0.96 0.95 0.95 139950
y_sm_train4=xgb.predict(x_sm)
y_sm_train4
array([0, 0, 0, ..., 1, 1, 1])
acc=accuracy_score(y_sm,y_sm_train4)
acc
0.9592618002849125
print(classification_report(y_sm,y_sm_train4))
precision recall f1-score support
0 0.93 1.00 0.96 209889
1 1.00 0.92 0.96 209889
accuracy 0.96 419778
macro avg 0.96 0.96 0.96 419778
weighted avg 0.96 0.96 0.96 419778
Logistic Regression: The Logistic Regression model achieved an accuracy of 92% and an F1-score of 0.88. It showed reasonably good performance in terms of accuracy and F1-score. However, it struggled to identify positive instances of class 1, as indicated by the low precision, recall, and F1-score for class 1.
Decision Tree: The Decision Tree model achieved perfect accuracy and F1-score of 1.00, indicating that it performed flawlessly on the given dataset. However, it is essential to evaluate its generalization capabilities on unseen data and be cautious about potential overfitting to the training data.
Random Forest: The Random Forest model also achieved perfect accuracy and F1-score of 1.00, showcasing excellent performance on the provided dataset. Random Forest is known for its ability to handle complex datasets and provide robust results. However, like the Decision Tree, it is necessary to assess its performance on additional data to ensure generalization.
XGBoost (Extreme Gradient Boosting): The XGBoost model achieved an accuracy of 93% and an F1-score of 0.90. It demonstrated good overall performance, with high precision and recall for both classes. However, it had a relatively lower recall and F1-score for class 1 compared to class 0.
It is recommended to further investigate the models' performance on unseen data, conduct cross-validation, and consider other relevant aspects specific to your project requirements before making a final decision on the most suitable model.